Home | History | Annotate | Download | only in clang-include
      1 /*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __ARM_NEON_H
     25 #define __ARM_NEON_H
     26 
     27 #if !defined(__ARM_NEON)
     28 #error "NEON support not enabled"
     29 #endif
     30 
     31 #include <stdint.h>
     32 
     33 typedef float float32_t;
     34 typedef __fp16 float16_t;
     35 #ifdef __aarch64__
     36 typedef double float64_t;
     37 #endif
     38 
     39 #ifdef __aarch64__
     40 typedef uint8_t poly8_t;
     41 typedef uint16_t poly16_t;
     42 typedef uint64_t poly64_t;
     43 typedef __uint128_t poly128_t;
     44 #else
     45 typedef int8_t poly8_t;
     46 typedef int16_t poly16_t;
     47 #endif
     48 typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
     49 typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
     50 typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
     51 typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
     52 typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
     53 typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
     54 typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
     55 typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
     56 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
     57 typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
     58 typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
     59 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
     60 typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
     61 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
     62 typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
     63 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
     64 typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
     65 typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
     66 typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
     67 typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
     68 #ifdef __aarch64__
     69 typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
     70 typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
     71 #endif
     72 typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
     73 typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
     74 typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
     75 typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
     76 #ifdef __aarch64__
     77 typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
     78 typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
     79 #endif
     80 
     81 typedef struct int8x8x2_t {
     82   int8x8_t val[2];
     83 } int8x8x2_t;
     84 
     85 typedef struct int8x16x2_t {
     86   int8x16_t val[2];
     87 } int8x16x2_t;
     88 
     89 typedef struct int16x4x2_t {
     90   int16x4_t val[2];
     91 } int16x4x2_t;
     92 
     93 typedef struct int16x8x2_t {
     94   int16x8_t val[2];
     95 } int16x8x2_t;
     96 
     97 typedef struct int32x2x2_t {
     98   int32x2_t val[2];
     99 } int32x2x2_t;
    100 
    101 typedef struct int32x4x2_t {
    102   int32x4_t val[2];
    103 } int32x4x2_t;
    104 
    105 typedef struct int64x1x2_t {
    106   int64x1_t val[2];
    107 } int64x1x2_t;
    108 
    109 typedef struct int64x2x2_t {
    110   int64x2_t val[2];
    111 } int64x2x2_t;
    112 
    113 typedef struct uint8x8x2_t {
    114   uint8x8_t val[2];
    115 } uint8x8x2_t;
    116 
    117 typedef struct uint8x16x2_t {
    118   uint8x16_t val[2];
    119 } uint8x16x2_t;
    120 
    121 typedef struct uint16x4x2_t {
    122   uint16x4_t val[2];
    123 } uint16x4x2_t;
    124 
    125 typedef struct uint16x8x2_t {
    126   uint16x8_t val[2];
    127 } uint16x8x2_t;
    128 
    129 typedef struct uint32x2x2_t {
    130   uint32x2_t val[2];
    131 } uint32x2x2_t;
    132 
    133 typedef struct uint32x4x2_t {
    134   uint32x4_t val[2];
    135 } uint32x4x2_t;
    136 
    137 typedef struct uint64x1x2_t {
    138   uint64x1_t val[2];
    139 } uint64x1x2_t;
    140 
    141 typedef struct uint64x2x2_t {
    142   uint64x2_t val[2];
    143 } uint64x2x2_t;
    144 
    145 typedef struct float16x4x2_t {
    146   float16x4_t val[2];
    147 } float16x4x2_t;
    148 
    149 typedef struct float16x8x2_t {
    150   float16x8_t val[2];
    151 } float16x8x2_t;
    152 
    153 typedef struct float32x2x2_t {
    154   float32x2_t val[2];
    155 } float32x2x2_t;
    156 
    157 typedef struct float32x4x2_t {
    158   float32x4_t val[2];
    159 } float32x4x2_t;
    160 
    161 #ifdef __aarch64__
    162 typedef struct float64x1x2_t {
    163   float64x1_t val[2];
    164 } float64x1x2_t;
    165 
    166 typedef struct float64x2x2_t {
    167   float64x2_t val[2];
    168 } float64x2x2_t;
    169 
    170 #endif
    171 typedef struct poly8x8x2_t {
    172   poly8x8_t val[2];
    173 } poly8x8x2_t;
    174 
    175 typedef struct poly8x16x2_t {
    176   poly8x16_t val[2];
    177 } poly8x16x2_t;
    178 
    179 typedef struct poly16x4x2_t {
    180   poly16x4_t val[2];
    181 } poly16x4x2_t;
    182 
    183 typedef struct poly16x8x2_t {
    184   poly16x8_t val[2];
    185 } poly16x8x2_t;
    186 
    187 #ifdef __aarch64__
    188 typedef struct poly64x1x2_t {
    189   poly64x1_t val[2];
    190 } poly64x1x2_t;
    191 
    192 typedef struct poly64x2x2_t {
    193   poly64x2_t val[2];
    194 } poly64x2x2_t;
    195 
    196 #endif
    197 typedef struct int8x8x3_t {
    198   int8x8_t val[3];
    199 } int8x8x3_t;
    200 
    201 typedef struct int8x16x3_t {
    202   int8x16_t val[3];
    203 } int8x16x3_t;
    204 
    205 typedef struct int16x4x3_t {
    206   int16x4_t val[3];
    207 } int16x4x3_t;
    208 
    209 typedef struct int16x8x3_t {
    210   int16x8_t val[3];
    211 } int16x8x3_t;
    212 
    213 typedef struct int32x2x3_t {
    214   int32x2_t val[3];
    215 } int32x2x3_t;
    216 
    217 typedef struct int32x4x3_t {
    218   int32x4_t val[3];
    219 } int32x4x3_t;
    220 
    221 typedef struct int64x1x3_t {
    222   int64x1_t val[3];
    223 } int64x1x3_t;
    224 
    225 typedef struct int64x2x3_t {
    226   int64x2_t val[3];
    227 } int64x2x3_t;
    228 
    229 typedef struct uint8x8x3_t {
    230   uint8x8_t val[3];
    231 } uint8x8x3_t;
    232 
    233 typedef struct uint8x16x3_t {
    234   uint8x16_t val[3];
    235 } uint8x16x3_t;
    236 
    237 typedef struct uint16x4x3_t {
    238   uint16x4_t val[3];
    239 } uint16x4x3_t;
    240 
    241 typedef struct uint16x8x3_t {
    242   uint16x8_t val[3];
    243 } uint16x8x3_t;
    244 
    245 typedef struct uint32x2x3_t {
    246   uint32x2_t val[3];
    247 } uint32x2x3_t;
    248 
    249 typedef struct uint32x4x3_t {
    250   uint32x4_t val[3];
    251 } uint32x4x3_t;
    252 
    253 typedef struct uint64x1x3_t {
    254   uint64x1_t val[3];
    255 } uint64x1x3_t;
    256 
    257 typedef struct uint64x2x3_t {
    258   uint64x2_t val[3];
    259 } uint64x2x3_t;
    260 
    261 typedef struct float16x4x3_t {
    262   float16x4_t val[3];
    263 } float16x4x3_t;
    264 
    265 typedef struct float16x8x3_t {
    266   float16x8_t val[3];
    267 } float16x8x3_t;
    268 
    269 typedef struct float32x2x3_t {
    270   float32x2_t val[3];
    271 } float32x2x3_t;
    272 
    273 typedef struct float32x4x3_t {
    274   float32x4_t val[3];
    275 } float32x4x3_t;
    276 
    277 #ifdef __aarch64__
    278 typedef struct float64x1x3_t {
    279   float64x1_t val[3];
    280 } float64x1x3_t;
    281 
    282 typedef struct float64x2x3_t {
    283   float64x2_t val[3];
    284 } float64x2x3_t;
    285 
    286 #endif
    287 typedef struct poly8x8x3_t {
    288   poly8x8_t val[3];
    289 } poly8x8x3_t;
    290 
    291 typedef struct poly8x16x3_t {
    292   poly8x16_t val[3];
    293 } poly8x16x3_t;
    294 
    295 typedef struct poly16x4x3_t {
    296   poly16x4_t val[3];
    297 } poly16x4x3_t;
    298 
    299 typedef struct poly16x8x3_t {
    300   poly16x8_t val[3];
    301 } poly16x8x3_t;
    302 
    303 #ifdef __aarch64__
    304 typedef struct poly64x1x3_t {
    305   poly64x1_t val[3];
    306 } poly64x1x3_t;
    307 
    308 typedef struct poly64x2x3_t {
    309   poly64x2_t val[3];
    310 } poly64x2x3_t;
    311 
    312 #endif
    313 typedef struct int8x8x4_t {
    314   int8x8_t val[4];
    315 } int8x8x4_t;
    316 
    317 typedef struct int8x16x4_t {
    318   int8x16_t val[4];
    319 } int8x16x4_t;
    320 
    321 typedef struct int16x4x4_t {
    322   int16x4_t val[4];
    323 } int16x4x4_t;
    324 
    325 typedef struct int16x8x4_t {
    326   int16x8_t val[4];
    327 } int16x8x4_t;
    328 
    329 typedef struct int32x2x4_t {
    330   int32x2_t val[4];
    331 } int32x2x4_t;
    332 
    333 typedef struct int32x4x4_t {
    334   int32x4_t val[4];
    335 } int32x4x4_t;
    336 
    337 typedef struct int64x1x4_t {
    338   int64x1_t val[4];
    339 } int64x1x4_t;
    340 
    341 typedef struct int64x2x4_t {
    342   int64x2_t val[4];
    343 } int64x2x4_t;
    344 
    345 typedef struct uint8x8x4_t {
    346   uint8x8_t val[4];
    347 } uint8x8x4_t;
    348 
    349 typedef struct uint8x16x4_t {
    350   uint8x16_t val[4];
    351 } uint8x16x4_t;
    352 
    353 typedef struct uint16x4x4_t {
    354   uint16x4_t val[4];
    355 } uint16x4x4_t;
    356 
    357 typedef struct uint16x8x4_t {
    358   uint16x8_t val[4];
    359 } uint16x8x4_t;
    360 
    361 typedef struct uint32x2x4_t {
    362   uint32x2_t val[4];
    363 } uint32x2x4_t;
    364 
    365 typedef struct uint32x4x4_t {
    366   uint32x4_t val[4];
    367 } uint32x4x4_t;
    368 
    369 typedef struct uint64x1x4_t {
    370   uint64x1_t val[4];
    371 } uint64x1x4_t;
    372 
    373 typedef struct uint64x2x4_t {
    374   uint64x2_t val[4];
    375 } uint64x2x4_t;
    376 
    377 typedef struct float16x4x4_t {
    378   float16x4_t val[4];
    379 } float16x4x4_t;
    380 
    381 typedef struct float16x8x4_t {
    382   float16x8_t val[4];
    383 } float16x8x4_t;
    384 
    385 typedef struct float32x2x4_t {
    386   float32x2_t val[4];
    387 } float32x2x4_t;
    388 
    389 typedef struct float32x4x4_t {
    390   float32x4_t val[4];
    391 } float32x4x4_t;
    392 
    393 #ifdef __aarch64__
    394 typedef struct float64x1x4_t {
    395   float64x1_t val[4];
    396 } float64x1x4_t;
    397 
    398 typedef struct float64x2x4_t {
    399   float64x2_t val[4];
    400 } float64x2x4_t;
    401 
    402 #endif
    403 typedef struct poly8x8x4_t {
    404   poly8x8_t val[4];
    405 } poly8x8x4_t;
    406 
    407 typedef struct poly8x16x4_t {
    408   poly8x16_t val[4];
    409 } poly8x16x4_t;
    410 
    411 typedef struct poly16x4x4_t {
    412   poly16x4_t val[4];
    413 } poly16x4x4_t;
    414 
    415 typedef struct poly16x8x4_t {
    416   poly16x8_t val[4];
    417 } poly16x8x4_t;
    418 
    419 #ifdef __aarch64__
    420 typedef struct poly64x1x4_t {
    421   poly64x1_t val[4];
    422 } poly64x1x4_t;
    423 
    424 typedef struct poly64x2x4_t {
    425   poly64x2_t val[4];
    426 } poly64x2x4_t;
    427 
    428 #endif
    429 
    430 #define __ai static inline __attribute__((__always_inline__, __nodebug__))
    431 
    432 #ifdef __LITTLE_ENDIAN__
    433 __ai uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) {
    434   uint8x16_t __ret;
    435   __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48);
    436   return __ret;
    437 }
    438 #else
    439 __ai uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) {
    440   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    441   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    442   uint8x16_t __ret;
    443   __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48);
    444   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    445   return __ret;
    446 }
    447 __ai uint8x16_t __noswap_vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) {
    448   uint8x16_t __ret;
    449   __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48);
    450   return __ret;
    451 }
    452 #endif
    453 
    454 #ifdef __LITTLE_ENDIAN__
    455 __ai uint32x4_t vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) {
    456   uint32x4_t __ret;
    457   __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
    458   return __ret;
    459 }
    460 #else
    461 __ai uint32x4_t vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) {
    462   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    463   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    464   uint32x4_t __ret;
    465   __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
    466   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    467   return __ret;
    468 }
    469 __ai uint32x4_t __noswap_vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) {
    470   uint32x4_t __ret;
    471   __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
    472   return __ret;
    473 }
    474 #endif
    475 
    476 #ifdef __LITTLE_ENDIAN__
    477 __ai uint16x8_t vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) {
    478   uint16x8_t __ret;
    479   __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 49);
    480   return __ret;
    481 }
    482 #else
    483 __ai uint16x8_t vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) {
    484   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
    485   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
    486   uint16x8_t __ret;
    487   __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49);
    488   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
    489   return __ret;
    490 }
    491 __ai uint16x8_t __noswap_vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) {
    492   uint16x8_t __ret;
    493   __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 49);
    494   return __ret;
    495 }
    496 #endif
    497 
    498 #ifdef __LITTLE_ENDIAN__
    499 __ai int8x16_t vabdq_s8(int8x16_t __p0, int8x16_t __p1) {
    500   int8x16_t __ret;
    501   __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 32);
    502   return __ret;
    503 }
    504 #else
    505 __ai int8x16_t vabdq_s8(int8x16_t __p0, int8x16_t __p1) {
    506   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    507   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    508   int8x16_t __ret;
    509   __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32);
    510   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    511   return __ret;
    512 }
    513 __ai int8x16_t __noswap_vabdq_s8(int8x16_t __p0, int8x16_t __p1) {
    514   int8x16_t __ret;
    515   __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 32);
    516   return __ret;
    517 }
    518 #endif
    519 
    520 #ifdef __LITTLE_ENDIAN__
    521 __ai float32x4_t vabdq_f32(float32x4_t __p0, float32x4_t __p1) {
    522   float32x4_t __ret;
    523   __ret = (float32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 41);
    524   return __ret;
    525 }
    526 #else
    527 __ai float32x4_t vabdq_f32(float32x4_t __p0, float32x4_t __p1) {
    528   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    529   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    530   float32x4_t __ret;
    531   __ret = (float32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41);
    532   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    533   return __ret;
    534 }
    535 #endif
    536 
    537 #ifdef __LITTLE_ENDIAN__
    538 __ai int32x4_t vabdq_s32(int32x4_t __p0, int32x4_t __p1) {
    539   int32x4_t __ret;
    540   __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 34);
    541   return __ret;
    542 }
    543 #else
    544 __ai int32x4_t vabdq_s32(int32x4_t __p0, int32x4_t __p1) {
    545   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    546   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    547   int32x4_t __ret;
    548   __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34);
    549   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    550   return __ret;
    551 }
    552 __ai int32x4_t __noswap_vabdq_s32(int32x4_t __p0, int32x4_t __p1) {
    553   int32x4_t __ret;
    554   __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 34);
    555   return __ret;
    556 }
    557 #endif
    558 
    559 #ifdef __LITTLE_ENDIAN__
    560 __ai int16x8_t vabdq_s16(int16x8_t __p0, int16x8_t __p1) {
    561   int16x8_t __ret;
    562   __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 33);
    563   return __ret;
    564 }
    565 #else
    566 __ai int16x8_t vabdq_s16(int16x8_t __p0, int16x8_t __p1) {
    567   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
    568   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
    569   int16x8_t __ret;
    570   __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33);
    571   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
    572   return __ret;
    573 }
    574 __ai int16x8_t __noswap_vabdq_s16(int16x8_t __p0, int16x8_t __p1) {
    575   int16x8_t __ret;
    576   __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 33);
    577   return __ret;
    578 }
    579 #endif
    580 
    581 #ifdef __LITTLE_ENDIAN__
    582 __ai uint8x8_t vabd_u8(uint8x8_t __p0, uint8x8_t __p1) {
    583   uint8x8_t __ret;
    584   __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 16);
    585   return __ret;
    586 }
    587 #else
    588 __ai uint8x8_t vabd_u8(uint8x8_t __p0, uint8x8_t __p1) {
    589   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
    590   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
    591   uint8x8_t __ret;
    592   __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16);
    593   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
    594   return __ret;
    595 }
    596 __ai uint8x8_t __noswap_vabd_u8(uint8x8_t __p0, uint8x8_t __p1) {
    597   uint8x8_t __ret;
    598   __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 16);
    599   return __ret;
    600 }
    601 #endif
    602 
    603 #ifdef __LITTLE_ENDIAN__
    604 __ai uint32x2_t vabd_u32(uint32x2_t __p0, uint32x2_t __p1) {
    605   uint32x2_t __ret;
    606   __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
    607   return __ret;
    608 }
    609 #else
    610 __ai uint32x2_t vabd_u32(uint32x2_t __p0, uint32x2_t __p1) {
    611   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
    612   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
    613   uint32x2_t __ret;
    614   __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18);
    615   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
    616   return __ret;
    617 }
    618 __ai uint32x2_t __noswap_vabd_u32(uint32x2_t __p0, uint32x2_t __p1) {
    619   uint32x2_t __ret;
    620   __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
    621   return __ret;
    622 }
    623 #endif
    624 
    625 #ifdef __LITTLE_ENDIAN__
    626 __ai uint16x4_t vabd_u16(uint16x4_t __p0, uint16x4_t __p1) {
    627   uint16x4_t __ret;
    628   __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 17);
    629   return __ret;
    630 }
    631 #else
    632 __ai uint16x4_t vabd_u16(uint16x4_t __p0, uint16x4_t __p1) {
    633   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    634   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    635   uint16x4_t __ret;
    636   __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17);
    637   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    638   return __ret;
    639 }
    640 __ai uint16x4_t __noswap_vabd_u16(uint16x4_t __p0, uint16x4_t __p1) {
    641   uint16x4_t __ret;
    642   __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 17);
    643   return __ret;
    644 }
    645 #endif
    646 
    647 #ifdef __LITTLE_ENDIAN__
    648 __ai int8x8_t vabd_s8(int8x8_t __p0, int8x8_t __p1) {
    649   int8x8_t __ret;
    650   __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 0);
    651   return __ret;
    652 }
    653 #else
    654 __ai int8x8_t vabd_s8(int8x8_t __p0, int8x8_t __p1) {
    655   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
    656   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
    657   int8x8_t __ret;
    658   __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0);
    659   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
    660   return __ret;
    661 }
    662 __ai int8x8_t __noswap_vabd_s8(int8x8_t __p0, int8x8_t __p1) {
    663   int8x8_t __ret;
    664   __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 0);
    665   return __ret;
    666 }
    667 #endif
    668 
    669 #ifdef __LITTLE_ENDIAN__
    670 __ai float32x2_t vabd_f32(float32x2_t __p0, float32x2_t __p1) {
    671   float32x2_t __ret;
    672   __ret = (float32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 9);
    673   return __ret;
    674 }
    675 #else
    676 __ai float32x2_t vabd_f32(float32x2_t __p0, float32x2_t __p1) {
    677   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
    678   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
    679   float32x2_t __ret;
    680   __ret = (float32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9);
    681   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
    682   return __ret;
    683 }
    684 #endif
    685 
    686 #ifdef __LITTLE_ENDIAN__
    687 __ai int32x2_t vabd_s32(int32x2_t __p0, int32x2_t __p1) {
    688   int32x2_t __ret;
    689   __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 2);
    690   return __ret;
    691 }
    692 #else
    693 __ai int32x2_t vabd_s32(int32x2_t __p0, int32x2_t __p1) {
    694   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
    695   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
    696   int32x2_t __ret;
    697   __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2);
    698   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
    699   return __ret;
    700 }
    701 __ai int32x2_t __noswap_vabd_s32(int32x2_t __p0, int32x2_t __p1) {
    702   int32x2_t __ret;
    703   __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 2);
    704   return __ret;
    705 }
    706 #endif
    707 
    708 #ifdef __LITTLE_ENDIAN__
    709 __ai int16x4_t vabd_s16(int16x4_t __p0, int16x4_t __p1) {
    710   int16x4_t __ret;
    711   __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 1);
    712   return __ret;
    713 }
    714 #else
    715 __ai int16x4_t vabd_s16(int16x4_t __p0, int16x4_t __p1) {
    716   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    717   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    718   int16x4_t __ret;
    719   __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1);
    720   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    721   return __ret;
    722 }
    723 __ai int16x4_t __noswap_vabd_s16(int16x4_t __p0, int16x4_t __p1) {
    724   int16x4_t __ret;
    725   __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 1);
    726   return __ret;
    727 }
    728 #endif
    729 
    730 #ifdef __LITTLE_ENDIAN__
    731 __ai int8x16_t vabsq_s8(int8x16_t __p0) {
    732   int8x16_t __ret;
    733   __ret = (int8x16_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 32);
    734   return __ret;
    735 }
    736 #else
    737 __ai int8x16_t vabsq_s8(int8x16_t __p0) {
    738   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    739   int8x16_t __ret;
    740   __ret = (int8x16_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 32);
    741   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    742   return __ret;
    743 }
    744 #endif
    745 
    746 #ifdef __LITTLE_ENDIAN__
    747 __ai float32x4_t vabsq_f32(float32x4_t __p0) {
    748   float32x4_t __ret;
    749   __ret = (float32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 41);
    750   return __ret;
    751 }
    752 #else
    753 __ai float32x4_t vabsq_f32(float32x4_t __p0) {
    754   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    755   float32x4_t __ret;
    756   __ret = (float32x4_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 41);
    757   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    758   return __ret;
    759 }
    760 #endif
    761 
    762 #ifdef __LITTLE_ENDIAN__
    763 __ai int32x4_t vabsq_s32(int32x4_t __p0) {
    764   int32x4_t __ret;
    765   __ret = (int32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 34);
    766   return __ret;
    767 }
    768 #else
    769 __ai int32x4_t vabsq_s32(int32x4_t __p0) {
    770   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    771   int32x4_t __ret;
    772   __ret = (int32x4_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 34);
    773   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    774   return __ret;
    775 }
    776 #endif
    777 
    778 #ifdef __LITTLE_ENDIAN__
    779 __ai int16x8_t vabsq_s16(int16x8_t __p0) {
    780   int16x8_t __ret;
    781   __ret = (int16x8_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 33);
    782   return __ret;
    783 }
    784 #else
    785 __ai int16x8_t vabsq_s16(int16x8_t __p0) {
    786   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
    787   int16x8_t __ret;
    788   __ret = (int16x8_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 33);
    789   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
    790   return __ret;
    791 }
    792 #endif
    793 
    794 #ifdef __LITTLE_ENDIAN__
    795 __ai int8x8_t vabs_s8(int8x8_t __p0) {
    796   int8x8_t __ret;
    797   __ret = (int8x8_t) __builtin_neon_vabs_v((int8x8_t)__p0, 0);
    798   return __ret;
    799 }
    800 #else
    801 __ai int8x8_t vabs_s8(int8x8_t __p0) {
    802   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
    803   int8x8_t __ret;
    804   __ret = (int8x8_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 0);
    805   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
    806   return __ret;
    807 }
    808 #endif
    809 
    810 #ifdef __LITTLE_ENDIAN__
    811 __ai float32x2_t vabs_f32(float32x2_t __p0) {
    812   float32x2_t __ret;
    813   __ret = (float32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 9);
    814   return __ret;
    815 }
    816 #else
    817 __ai float32x2_t vabs_f32(float32x2_t __p0) {
    818   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
    819   float32x2_t __ret;
    820   __ret = (float32x2_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 9);
    821   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
    822   return __ret;
    823 }
    824 #endif
    825 
    826 #ifdef __LITTLE_ENDIAN__
    827 __ai int32x2_t vabs_s32(int32x2_t __p0) {
    828   int32x2_t __ret;
    829   __ret = (int32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 2);
    830   return __ret;
    831 }
    832 #else
    833 __ai int32x2_t vabs_s32(int32x2_t __p0) {
    834   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
    835   int32x2_t __ret;
    836   __ret = (int32x2_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 2);
    837   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
    838   return __ret;
    839 }
    840 #endif
    841 
    842 #ifdef __LITTLE_ENDIAN__
    843 __ai int16x4_t vabs_s16(int16x4_t __p0) {
    844   int16x4_t __ret;
    845   __ret = (int16x4_t) __builtin_neon_vabs_v((int8x8_t)__p0, 1);
    846   return __ret;
    847 }
    848 #else
    849 __ai int16x4_t vabs_s16(int16x4_t __p0) {
    850   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    851   int16x4_t __ret;
    852   __ret = (int16x4_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 1);
    853   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    854   return __ret;
    855 }
    856 #endif
    857 
    858 #ifdef __LITTLE_ENDIAN__
    859 __ai uint8x16_t vaddq_u8(uint8x16_t __p0, uint8x16_t __p1) {
    860   uint8x16_t __ret;
    861   __ret = __p0 + __p1;
    862   return __ret;
    863 }
    864 #else
    865 __ai uint8x16_t vaddq_u8(uint8x16_t __p0, uint8x16_t __p1) {
    866   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    867   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    868   uint8x16_t __ret;
    869   __ret = __rev0 + __rev1;
    870   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    871   return __ret;
    872 }
    873 #endif
    874 
    875 #ifdef __LITTLE_ENDIAN__
    876 __ai uint32x4_t vaddq_u32(uint32x4_t __p0, uint32x4_t __p1) {
    877   uint32x4_t __ret;
    878   __ret = __p0 + __p1;
    879   return __ret;
    880 }
    881 #else
    882 __ai uint32x4_t vaddq_u32(uint32x4_t __p0, uint32x4_t __p1) {
    883   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    884   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    885   uint32x4_t __ret;
    886   __ret = __rev0 + __rev1;
    887   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    888   return __ret;
    889 }
    890 #endif
    891 
    892 #ifdef __LITTLE_ENDIAN__
    893 __ai uint64x2_t vaddq_u64(uint64x2_t __p0, uint64x2_t __p1) {
    894   uint64x2_t __ret;
    895   __ret = __p0 + __p1;
    896   return __ret;
    897 }
    898 #else
    899 __ai uint64x2_t vaddq_u64(uint64x2_t __p0, uint64x2_t __p1) {
    900   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
    901   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
    902   uint64x2_t __ret;
    903   __ret = __rev0 + __rev1;
    904   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
    905   return __ret;
    906 }
    907 #endif
    908 
    909 #ifdef __LITTLE_ENDIAN__
    910 __ai uint16x8_t vaddq_u16(uint16x8_t __p0, uint16x8_t __p1) {
    911   uint16x8_t __ret;
    912   __ret = __p0 + __p1;
    913   return __ret;
    914 }
    915 #else
    916 __ai uint16x8_t vaddq_u16(uint16x8_t __p0, uint16x8_t __p1) {
    917   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
    918   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
    919   uint16x8_t __ret;
    920   __ret = __rev0 + __rev1;
    921   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
    922   return __ret;
    923 }
    924 #endif
    925 
    926 #ifdef __LITTLE_ENDIAN__
    927 __ai int8x16_t vaddq_s8(int8x16_t __p0, int8x16_t __p1) {
    928   int8x16_t __ret;
    929   __ret = __p0 + __p1;
    930   return __ret;
    931 }
    932 #else
    933 __ai int8x16_t vaddq_s8(int8x16_t __p0, int8x16_t __p1) {
    934   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    935   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    936   int8x16_t __ret;
    937   __ret = __rev0 + __rev1;
    938   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
    939   return __ret;
    940 }
    941 #endif
    942 
    943 #ifdef __LITTLE_ENDIAN__
    944 __ai float32x4_t vaddq_f32(float32x4_t __p0, float32x4_t __p1) {
    945   float32x4_t __ret;
    946   __ret = __p0 + __p1;
    947   return __ret;
    948 }
    949 #else
    950 __ai float32x4_t vaddq_f32(float32x4_t __p0, float32x4_t __p1) {
    951   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    952   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    953   float32x4_t __ret;
    954   __ret = __rev0 + __rev1;
    955   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    956   return __ret;
    957 }
    958 #endif
    959 
    960 #ifdef __LITTLE_ENDIAN__
    961 __ai int32x4_t vaddq_s32(int32x4_t __p0, int32x4_t __p1) {
    962   int32x4_t __ret;
    963   __ret = __p0 + __p1;
    964   return __ret;
    965 }
    966 #else
    967 __ai int32x4_t vaddq_s32(int32x4_t __p0, int32x4_t __p1) {
    968   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
    969   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
    970   int32x4_t __ret;
    971   __ret = __rev0 + __rev1;
    972   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
    973   return __ret;
    974 }
    975 #endif
    976 
    977 #ifdef __LITTLE_ENDIAN__
    978 __ai int64x2_t vaddq_s64(int64x2_t __p0, int64x2_t __p1) {
    979   int64x2_t __ret;
    980   __ret = __p0 + __p1;
    981   return __ret;
    982 }
    983 #else
    984 __ai int64x2_t vaddq_s64(int64x2_t __p0, int64x2_t __p1) {
    985   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
    986   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
    987   int64x2_t __ret;
    988   __ret = __rev0 + __rev1;
    989   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
    990   return __ret;
    991 }
    992 #endif
    993 
    994 #ifdef __LITTLE_ENDIAN__
    995 __ai int16x8_t vaddq_s16(int16x8_t __p0, int16x8_t __p1) {
    996   int16x8_t __ret;
    997   __ret = __p0 + __p1;
    998   return __ret;
    999 }
   1000 #else
   1001 __ai int16x8_t vaddq_s16(int16x8_t __p0, int16x8_t __p1) {
   1002   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1003   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1004   int16x8_t __ret;
   1005   __ret = __rev0 + __rev1;
   1006   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1007   return __ret;
   1008 }
   1009 #endif
   1010 
   1011 #ifdef __LITTLE_ENDIAN__
   1012 __ai uint8x8_t vadd_u8(uint8x8_t __p0, uint8x8_t __p1) {
   1013   uint8x8_t __ret;
   1014   __ret = __p0 + __p1;
   1015   return __ret;
   1016 }
   1017 #else
   1018 __ai uint8x8_t vadd_u8(uint8x8_t __p0, uint8x8_t __p1) {
   1019   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1020   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1021   uint8x8_t __ret;
   1022   __ret = __rev0 + __rev1;
   1023   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1024   return __ret;
   1025 }
   1026 #endif
   1027 
   1028 #ifdef __LITTLE_ENDIAN__
   1029 __ai uint32x2_t vadd_u32(uint32x2_t __p0, uint32x2_t __p1) {
   1030   uint32x2_t __ret;
   1031   __ret = __p0 + __p1;
   1032   return __ret;
   1033 }
   1034 #else
   1035 __ai uint32x2_t vadd_u32(uint32x2_t __p0, uint32x2_t __p1) {
   1036   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1037   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1038   uint32x2_t __ret;
   1039   __ret = __rev0 + __rev1;
   1040   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1041   return __ret;
   1042 }
   1043 #endif
   1044 
   1045 #ifdef __LITTLE_ENDIAN__
   1046 __ai uint64x1_t vadd_u64(uint64x1_t __p0, uint64x1_t __p1) {
   1047   uint64x1_t __ret;
   1048   __ret = __p0 + __p1;
   1049   return __ret;
   1050 }
   1051 #else
   1052 __ai uint64x1_t vadd_u64(uint64x1_t __p0, uint64x1_t __p1) {
   1053   uint64x1_t __ret;
   1054   __ret = __p0 + __p1;
   1055   return __ret;
   1056 }
   1057 #endif
   1058 
   1059 #ifdef __LITTLE_ENDIAN__
   1060 __ai uint16x4_t vadd_u16(uint16x4_t __p0, uint16x4_t __p1) {
   1061   uint16x4_t __ret;
   1062   __ret = __p0 + __p1;
   1063   return __ret;
   1064 }
   1065 #else
   1066 __ai uint16x4_t vadd_u16(uint16x4_t __p0, uint16x4_t __p1) {
   1067   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1068   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1069   uint16x4_t __ret;
   1070   __ret = __rev0 + __rev1;
   1071   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1072   return __ret;
   1073 }
   1074 #endif
   1075 
   1076 #ifdef __LITTLE_ENDIAN__
   1077 __ai int8x8_t vadd_s8(int8x8_t __p0, int8x8_t __p1) {
   1078   int8x8_t __ret;
   1079   __ret = __p0 + __p1;
   1080   return __ret;
   1081 }
   1082 #else
   1083 __ai int8x8_t vadd_s8(int8x8_t __p0, int8x8_t __p1) {
   1084   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1085   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1086   int8x8_t __ret;
   1087   __ret = __rev0 + __rev1;
   1088   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1089   return __ret;
   1090 }
   1091 #endif
   1092 
   1093 #ifdef __LITTLE_ENDIAN__
   1094 __ai float32x2_t vadd_f32(float32x2_t __p0, float32x2_t __p1) {
   1095   float32x2_t __ret;
   1096   __ret = __p0 + __p1;
   1097   return __ret;
   1098 }
   1099 #else
   1100 __ai float32x2_t vadd_f32(float32x2_t __p0, float32x2_t __p1) {
   1101   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1102   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1103   float32x2_t __ret;
   1104   __ret = __rev0 + __rev1;
   1105   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1106   return __ret;
   1107 }
   1108 #endif
   1109 
   1110 #ifdef __LITTLE_ENDIAN__
   1111 __ai int32x2_t vadd_s32(int32x2_t __p0, int32x2_t __p1) {
   1112   int32x2_t __ret;
   1113   __ret = __p0 + __p1;
   1114   return __ret;
   1115 }
   1116 #else
   1117 __ai int32x2_t vadd_s32(int32x2_t __p0, int32x2_t __p1) {
   1118   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1119   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1120   int32x2_t __ret;
   1121   __ret = __rev0 + __rev1;
   1122   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1123   return __ret;
   1124 }
   1125 #endif
   1126 
   1127 #ifdef __LITTLE_ENDIAN__
   1128 __ai int64x1_t vadd_s64(int64x1_t __p0, int64x1_t __p1) {
   1129   int64x1_t __ret;
   1130   __ret = __p0 + __p1;
   1131   return __ret;
   1132 }
   1133 #else
   1134 __ai int64x1_t vadd_s64(int64x1_t __p0, int64x1_t __p1) {
   1135   int64x1_t __ret;
   1136   __ret = __p0 + __p1;
   1137   return __ret;
   1138 }
   1139 #endif
   1140 
   1141 #ifdef __LITTLE_ENDIAN__
   1142 __ai int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) {
   1143   int16x4_t __ret;
   1144   __ret = __p0 + __p1;
   1145   return __ret;
   1146 }
   1147 #else
   1148 __ai int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) {
   1149   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1150   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1151   int16x4_t __ret;
   1152   __ret = __rev0 + __rev1;
   1153   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1154   return __ret;
   1155 }
   1156 #endif
   1157 
   1158 #ifdef __LITTLE_ENDIAN__
   1159 __ai uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) {
   1160   uint16x4_t __ret;
   1161   __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17);
   1162   return __ret;
   1163 }
   1164 #else
   1165 __ai uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) {
   1166   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1167   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1168   uint16x4_t __ret;
   1169   __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17);
   1170   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1171   return __ret;
   1172 }
   1173 __ai uint16x4_t __noswap_vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) {
   1174   uint16x4_t __ret;
   1175   __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17);
   1176   return __ret;
   1177 }
   1178 #endif
   1179 
   1180 #ifdef __LITTLE_ENDIAN__
   1181 __ai uint32x2_t vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) {
   1182   uint32x2_t __ret;
   1183   __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18);
   1184   return __ret;
   1185 }
   1186 #else
   1187 __ai uint32x2_t vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) {
   1188   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1189   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1190   uint32x2_t __ret;
   1191   __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18);
   1192   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1193   return __ret;
   1194 }
   1195 __ai uint32x2_t __noswap_vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) {
   1196   uint32x2_t __ret;
   1197   __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18);
   1198   return __ret;
   1199 }
   1200 #endif
   1201 
   1202 #ifdef __LITTLE_ENDIAN__
   1203 __ai uint8x8_t vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) {
   1204   uint8x8_t __ret;
   1205   __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16);
   1206   return __ret;
   1207 }
   1208 #else
   1209 __ai uint8x8_t vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) {
   1210   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1211   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1212   uint8x8_t __ret;
   1213   __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16);
   1214   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1215   return __ret;
   1216 }
   1217 __ai uint8x8_t __noswap_vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) {
   1218   uint8x8_t __ret;
   1219   __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16);
   1220   return __ret;
   1221 }
   1222 #endif
   1223 
   1224 #ifdef __LITTLE_ENDIAN__
   1225 __ai int16x4_t vaddhn_s32(int32x4_t __p0, int32x4_t __p1) {
   1226   int16x4_t __ret;
   1227   __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1);
   1228   return __ret;
   1229 }
   1230 #else
   1231 __ai int16x4_t vaddhn_s32(int32x4_t __p0, int32x4_t __p1) {
   1232   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1233   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1234   int16x4_t __ret;
   1235   __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1);
   1236   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1237   return __ret;
   1238 }
   1239 __ai int16x4_t __noswap_vaddhn_s32(int32x4_t __p0, int32x4_t __p1) {
   1240   int16x4_t __ret;
   1241   __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1);
   1242   return __ret;
   1243 }
   1244 #endif
   1245 
   1246 #ifdef __LITTLE_ENDIAN__
   1247 __ai int32x2_t vaddhn_s64(int64x2_t __p0, int64x2_t __p1) {
   1248   int32x2_t __ret;
   1249   __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2);
   1250   return __ret;
   1251 }
   1252 #else
   1253 __ai int32x2_t vaddhn_s64(int64x2_t __p0, int64x2_t __p1) {
   1254   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1255   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1256   int32x2_t __ret;
   1257   __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2);
   1258   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1259   return __ret;
   1260 }
   1261 __ai int32x2_t __noswap_vaddhn_s64(int64x2_t __p0, int64x2_t __p1) {
   1262   int32x2_t __ret;
   1263   __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2);
   1264   return __ret;
   1265 }
   1266 #endif
   1267 
   1268 #ifdef __LITTLE_ENDIAN__
   1269 __ai int8x8_t vaddhn_s16(int16x8_t __p0, int16x8_t __p1) {
   1270   int8x8_t __ret;
   1271   __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0);
   1272   return __ret;
   1273 }
   1274 #else
   1275 __ai int8x8_t vaddhn_s16(int16x8_t __p0, int16x8_t __p1) {
   1276   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1277   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1278   int8x8_t __ret;
   1279   __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0);
   1280   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1281   return __ret;
   1282 }
   1283 __ai int8x8_t __noswap_vaddhn_s16(int16x8_t __p0, int16x8_t __p1) {
   1284   int8x8_t __ret;
   1285   __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0);
   1286   return __ret;
   1287 }
   1288 #endif
   1289 
   1290 #ifdef __LITTLE_ENDIAN__
   1291 __ai uint8x16_t vandq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   1292   uint8x16_t __ret;
   1293   __ret = __p0 & __p1;
   1294   return __ret;
   1295 }
   1296 #else
   1297 __ai uint8x16_t vandq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   1298   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1299   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1300   uint8x16_t __ret;
   1301   __ret = __rev0 & __rev1;
   1302   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1303   return __ret;
   1304 }
   1305 #endif
   1306 
   1307 #ifdef __LITTLE_ENDIAN__
   1308 __ai uint32x4_t vandq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   1309   uint32x4_t __ret;
   1310   __ret = __p0 & __p1;
   1311   return __ret;
   1312 }
   1313 #else
   1314 __ai uint32x4_t vandq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   1315   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1316   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1317   uint32x4_t __ret;
   1318   __ret = __rev0 & __rev1;
   1319   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1320   return __ret;
   1321 }
   1322 #endif
   1323 
   1324 #ifdef __LITTLE_ENDIAN__
   1325 __ai uint64x2_t vandq_u64(uint64x2_t __p0, uint64x2_t __p1) {
   1326   uint64x2_t __ret;
   1327   __ret = __p0 & __p1;
   1328   return __ret;
   1329 }
   1330 #else
   1331 __ai uint64x2_t vandq_u64(uint64x2_t __p0, uint64x2_t __p1) {
   1332   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1333   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1334   uint64x2_t __ret;
   1335   __ret = __rev0 & __rev1;
   1336   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1337   return __ret;
   1338 }
   1339 #endif
   1340 
   1341 #ifdef __LITTLE_ENDIAN__
   1342 __ai uint16x8_t vandq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   1343   uint16x8_t __ret;
   1344   __ret = __p0 & __p1;
   1345   return __ret;
   1346 }
   1347 #else
   1348 __ai uint16x8_t vandq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   1349   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1350   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1351   uint16x8_t __ret;
   1352   __ret = __rev0 & __rev1;
   1353   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1354   return __ret;
   1355 }
   1356 #endif
   1357 
   1358 #ifdef __LITTLE_ENDIAN__
   1359 __ai int8x16_t vandq_s8(int8x16_t __p0, int8x16_t __p1) {
   1360   int8x16_t __ret;
   1361   __ret = __p0 & __p1;
   1362   return __ret;
   1363 }
   1364 #else
   1365 __ai int8x16_t vandq_s8(int8x16_t __p0, int8x16_t __p1) {
   1366   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1367   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1368   int8x16_t __ret;
   1369   __ret = __rev0 & __rev1;
   1370   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1371   return __ret;
   1372 }
   1373 #endif
   1374 
   1375 #ifdef __LITTLE_ENDIAN__
   1376 __ai int32x4_t vandq_s32(int32x4_t __p0, int32x4_t __p1) {
   1377   int32x4_t __ret;
   1378   __ret = __p0 & __p1;
   1379   return __ret;
   1380 }
   1381 #else
   1382 __ai int32x4_t vandq_s32(int32x4_t __p0, int32x4_t __p1) {
   1383   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1384   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1385   int32x4_t __ret;
   1386   __ret = __rev0 & __rev1;
   1387   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1388   return __ret;
   1389 }
   1390 #endif
   1391 
   1392 #ifdef __LITTLE_ENDIAN__
   1393 __ai int64x2_t vandq_s64(int64x2_t __p0, int64x2_t __p1) {
   1394   int64x2_t __ret;
   1395   __ret = __p0 & __p1;
   1396   return __ret;
   1397 }
   1398 #else
   1399 __ai int64x2_t vandq_s64(int64x2_t __p0, int64x2_t __p1) {
   1400   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1401   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1402   int64x2_t __ret;
   1403   __ret = __rev0 & __rev1;
   1404   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1405   return __ret;
   1406 }
   1407 #endif
   1408 
   1409 #ifdef __LITTLE_ENDIAN__
   1410 __ai int16x8_t vandq_s16(int16x8_t __p0, int16x8_t __p1) {
   1411   int16x8_t __ret;
   1412   __ret = __p0 & __p1;
   1413   return __ret;
   1414 }
   1415 #else
   1416 __ai int16x8_t vandq_s16(int16x8_t __p0, int16x8_t __p1) {
   1417   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1418   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1419   int16x8_t __ret;
   1420   __ret = __rev0 & __rev1;
   1421   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1422   return __ret;
   1423 }
   1424 #endif
   1425 
   1426 #ifdef __LITTLE_ENDIAN__
   1427 __ai uint8x8_t vand_u8(uint8x8_t __p0, uint8x8_t __p1) {
   1428   uint8x8_t __ret;
   1429   __ret = __p0 & __p1;
   1430   return __ret;
   1431 }
   1432 #else
   1433 __ai uint8x8_t vand_u8(uint8x8_t __p0, uint8x8_t __p1) {
   1434   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1435   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1436   uint8x8_t __ret;
   1437   __ret = __rev0 & __rev1;
   1438   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1439   return __ret;
   1440 }
   1441 #endif
   1442 
   1443 #ifdef __LITTLE_ENDIAN__
   1444 __ai uint32x2_t vand_u32(uint32x2_t __p0, uint32x2_t __p1) {
   1445   uint32x2_t __ret;
   1446   __ret = __p0 & __p1;
   1447   return __ret;
   1448 }
   1449 #else
   1450 __ai uint32x2_t vand_u32(uint32x2_t __p0, uint32x2_t __p1) {
   1451   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1452   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1453   uint32x2_t __ret;
   1454   __ret = __rev0 & __rev1;
   1455   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1456   return __ret;
   1457 }
   1458 #endif
   1459 
   1460 #ifdef __LITTLE_ENDIAN__
   1461 __ai uint64x1_t vand_u64(uint64x1_t __p0, uint64x1_t __p1) {
   1462   uint64x1_t __ret;
   1463   __ret = __p0 & __p1;
   1464   return __ret;
   1465 }
   1466 #else
   1467 __ai uint64x1_t vand_u64(uint64x1_t __p0, uint64x1_t __p1) {
   1468   uint64x1_t __ret;
   1469   __ret = __p0 & __p1;
   1470   return __ret;
   1471 }
   1472 #endif
   1473 
   1474 #ifdef __LITTLE_ENDIAN__
   1475 __ai uint16x4_t vand_u16(uint16x4_t __p0, uint16x4_t __p1) {
   1476   uint16x4_t __ret;
   1477   __ret = __p0 & __p1;
   1478   return __ret;
   1479 }
   1480 #else
   1481 __ai uint16x4_t vand_u16(uint16x4_t __p0, uint16x4_t __p1) {
   1482   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1483   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1484   uint16x4_t __ret;
   1485   __ret = __rev0 & __rev1;
   1486   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1487   return __ret;
   1488 }
   1489 #endif
   1490 
   1491 #ifdef __LITTLE_ENDIAN__
   1492 __ai int8x8_t vand_s8(int8x8_t __p0, int8x8_t __p1) {
   1493   int8x8_t __ret;
   1494   __ret = __p0 & __p1;
   1495   return __ret;
   1496 }
   1497 #else
   1498 __ai int8x8_t vand_s8(int8x8_t __p0, int8x8_t __p1) {
   1499   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1500   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1501   int8x8_t __ret;
   1502   __ret = __rev0 & __rev1;
   1503   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1504   return __ret;
   1505 }
   1506 #endif
   1507 
   1508 #ifdef __LITTLE_ENDIAN__
   1509 __ai int32x2_t vand_s32(int32x2_t __p0, int32x2_t __p1) {
   1510   int32x2_t __ret;
   1511   __ret = __p0 & __p1;
   1512   return __ret;
   1513 }
   1514 #else
   1515 __ai int32x2_t vand_s32(int32x2_t __p0, int32x2_t __p1) {
   1516   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1517   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1518   int32x2_t __ret;
   1519   __ret = __rev0 & __rev1;
   1520   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1521   return __ret;
   1522 }
   1523 #endif
   1524 
   1525 #ifdef __LITTLE_ENDIAN__
   1526 __ai int64x1_t vand_s64(int64x1_t __p0, int64x1_t __p1) {
   1527   int64x1_t __ret;
   1528   __ret = __p0 & __p1;
   1529   return __ret;
   1530 }
   1531 #else
   1532 __ai int64x1_t vand_s64(int64x1_t __p0, int64x1_t __p1) {
   1533   int64x1_t __ret;
   1534   __ret = __p0 & __p1;
   1535   return __ret;
   1536 }
   1537 #endif
   1538 
   1539 #ifdef __LITTLE_ENDIAN__
   1540 __ai int16x4_t vand_s16(int16x4_t __p0, int16x4_t __p1) {
   1541   int16x4_t __ret;
   1542   __ret = __p0 & __p1;
   1543   return __ret;
   1544 }
   1545 #else
   1546 __ai int16x4_t vand_s16(int16x4_t __p0, int16x4_t __p1) {
   1547   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1548   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1549   int16x4_t __ret;
   1550   __ret = __rev0 & __rev1;
   1551   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1552   return __ret;
   1553 }
   1554 #endif
   1555 
   1556 #ifdef __LITTLE_ENDIAN__
   1557 __ai uint8x16_t vbicq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   1558   uint8x16_t __ret;
   1559   __ret = __p0 & ~__p1;
   1560   return __ret;
   1561 }
   1562 #else
   1563 __ai uint8x16_t vbicq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   1564   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1565   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1566   uint8x16_t __ret;
   1567   __ret = __rev0 & ~__rev1;
   1568   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1569   return __ret;
   1570 }
   1571 #endif
   1572 
   1573 #ifdef __LITTLE_ENDIAN__
   1574 __ai uint32x4_t vbicq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   1575   uint32x4_t __ret;
   1576   __ret = __p0 & ~__p1;
   1577   return __ret;
   1578 }
   1579 #else
   1580 __ai uint32x4_t vbicq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   1581   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1582   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1583   uint32x4_t __ret;
   1584   __ret = __rev0 & ~__rev1;
   1585   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1586   return __ret;
   1587 }
   1588 #endif
   1589 
   1590 #ifdef __LITTLE_ENDIAN__
   1591 __ai uint64x2_t vbicq_u64(uint64x2_t __p0, uint64x2_t __p1) {
   1592   uint64x2_t __ret;
   1593   __ret = __p0 & ~__p1;
   1594   return __ret;
   1595 }
   1596 #else
   1597 __ai uint64x2_t vbicq_u64(uint64x2_t __p0, uint64x2_t __p1) {
   1598   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1599   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1600   uint64x2_t __ret;
   1601   __ret = __rev0 & ~__rev1;
   1602   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1603   return __ret;
   1604 }
   1605 #endif
   1606 
   1607 #ifdef __LITTLE_ENDIAN__
   1608 __ai uint16x8_t vbicq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   1609   uint16x8_t __ret;
   1610   __ret = __p0 & ~__p1;
   1611   return __ret;
   1612 }
   1613 #else
   1614 __ai uint16x8_t vbicq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   1615   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1616   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1617   uint16x8_t __ret;
   1618   __ret = __rev0 & ~__rev1;
   1619   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1620   return __ret;
   1621 }
   1622 #endif
   1623 
   1624 #ifdef __LITTLE_ENDIAN__
   1625 __ai int8x16_t vbicq_s8(int8x16_t __p0, int8x16_t __p1) {
   1626   int8x16_t __ret;
   1627   __ret = __p0 & ~__p1;
   1628   return __ret;
   1629 }
   1630 #else
   1631 __ai int8x16_t vbicq_s8(int8x16_t __p0, int8x16_t __p1) {
   1632   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1633   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1634   int8x16_t __ret;
   1635   __ret = __rev0 & ~__rev1;
   1636   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1637   return __ret;
   1638 }
   1639 #endif
   1640 
   1641 #ifdef __LITTLE_ENDIAN__
   1642 __ai int32x4_t vbicq_s32(int32x4_t __p0, int32x4_t __p1) {
   1643   int32x4_t __ret;
   1644   __ret = __p0 & ~__p1;
   1645   return __ret;
   1646 }
   1647 #else
   1648 __ai int32x4_t vbicq_s32(int32x4_t __p0, int32x4_t __p1) {
   1649   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1650   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1651   int32x4_t __ret;
   1652   __ret = __rev0 & ~__rev1;
   1653   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1654   return __ret;
   1655 }
   1656 #endif
   1657 
   1658 #ifdef __LITTLE_ENDIAN__
   1659 __ai int64x2_t vbicq_s64(int64x2_t __p0, int64x2_t __p1) {
   1660   int64x2_t __ret;
   1661   __ret = __p0 & ~__p1;
   1662   return __ret;
   1663 }
   1664 #else
   1665 __ai int64x2_t vbicq_s64(int64x2_t __p0, int64x2_t __p1) {
   1666   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1667   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1668   int64x2_t __ret;
   1669   __ret = __rev0 & ~__rev1;
   1670   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1671   return __ret;
   1672 }
   1673 #endif
   1674 
   1675 #ifdef __LITTLE_ENDIAN__
   1676 __ai int16x8_t vbicq_s16(int16x8_t __p0, int16x8_t __p1) {
   1677   int16x8_t __ret;
   1678   __ret = __p0 & ~__p1;
   1679   return __ret;
   1680 }
   1681 #else
   1682 __ai int16x8_t vbicq_s16(int16x8_t __p0, int16x8_t __p1) {
   1683   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1684   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1685   int16x8_t __ret;
   1686   __ret = __rev0 & ~__rev1;
   1687   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1688   return __ret;
   1689 }
   1690 #endif
   1691 
   1692 #ifdef __LITTLE_ENDIAN__
   1693 __ai uint8x8_t vbic_u8(uint8x8_t __p0, uint8x8_t __p1) {
   1694   uint8x8_t __ret;
   1695   __ret = __p0 & ~__p1;
   1696   return __ret;
   1697 }
   1698 #else
   1699 __ai uint8x8_t vbic_u8(uint8x8_t __p0, uint8x8_t __p1) {
   1700   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1701   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1702   uint8x8_t __ret;
   1703   __ret = __rev0 & ~__rev1;
   1704   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1705   return __ret;
   1706 }
   1707 #endif
   1708 
   1709 #ifdef __LITTLE_ENDIAN__
   1710 __ai uint32x2_t vbic_u32(uint32x2_t __p0, uint32x2_t __p1) {
   1711   uint32x2_t __ret;
   1712   __ret = __p0 & ~__p1;
   1713   return __ret;
   1714 }
   1715 #else
   1716 __ai uint32x2_t vbic_u32(uint32x2_t __p0, uint32x2_t __p1) {
   1717   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1718   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1719   uint32x2_t __ret;
   1720   __ret = __rev0 & ~__rev1;
   1721   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1722   return __ret;
   1723 }
   1724 #endif
   1725 
   1726 #ifdef __LITTLE_ENDIAN__
   1727 __ai uint64x1_t vbic_u64(uint64x1_t __p0, uint64x1_t __p1) {
   1728   uint64x1_t __ret;
   1729   __ret = __p0 & ~__p1;
   1730   return __ret;
   1731 }
   1732 #else
   1733 __ai uint64x1_t vbic_u64(uint64x1_t __p0, uint64x1_t __p1) {
   1734   uint64x1_t __ret;
   1735   __ret = __p0 & ~__p1;
   1736   return __ret;
   1737 }
   1738 #endif
   1739 
   1740 #ifdef __LITTLE_ENDIAN__
   1741 __ai uint16x4_t vbic_u16(uint16x4_t __p0, uint16x4_t __p1) {
   1742   uint16x4_t __ret;
   1743   __ret = __p0 & ~__p1;
   1744   return __ret;
   1745 }
   1746 #else
   1747 __ai uint16x4_t vbic_u16(uint16x4_t __p0, uint16x4_t __p1) {
   1748   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1749   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1750   uint16x4_t __ret;
   1751   __ret = __rev0 & ~__rev1;
   1752   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1753   return __ret;
   1754 }
   1755 #endif
   1756 
   1757 #ifdef __LITTLE_ENDIAN__
   1758 __ai int8x8_t vbic_s8(int8x8_t __p0, int8x8_t __p1) {
   1759   int8x8_t __ret;
   1760   __ret = __p0 & ~__p1;
   1761   return __ret;
   1762 }
   1763 #else
   1764 __ai int8x8_t vbic_s8(int8x8_t __p0, int8x8_t __p1) {
   1765   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1766   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1767   int8x8_t __ret;
   1768   __ret = __rev0 & ~__rev1;
   1769   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1770   return __ret;
   1771 }
   1772 #endif
   1773 
   1774 #ifdef __LITTLE_ENDIAN__
   1775 __ai int32x2_t vbic_s32(int32x2_t __p0, int32x2_t __p1) {
   1776   int32x2_t __ret;
   1777   __ret = __p0 & ~__p1;
   1778   return __ret;
   1779 }
   1780 #else
   1781 __ai int32x2_t vbic_s32(int32x2_t __p0, int32x2_t __p1) {
   1782   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1783   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1784   int32x2_t __ret;
   1785   __ret = __rev0 & ~__rev1;
   1786   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1787   return __ret;
   1788 }
   1789 #endif
   1790 
   1791 #ifdef __LITTLE_ENDIAN__
   1792 __ai int64x1_t vbic_s64(int64x1_t __p0, int64x1_t __p1) {
   1793   int64x1_t __ret;
   1794   __ret = __p0 & ~__p1;
   1795   return __ret;
   1796 }
   1797 #else
   1798 __ai int64x1_t vbic_s64(int64x1_t __p0, int64x1_t __p1) {
   1799   int64x1_t __ret;
   1800   __ret = __p0 & ~__p1;
   1801   return __ret;
   1802 }
   1803 #endif
   1804 
   1805 #ifdef __LITTLE_ENDIAN__
   1806 __ai int16x4_t vbic_s16(int16x4_t __p0, int16x4_t __p1) {
   1807   int16x4_t __ret;
   1808   __ret = __p0 & ~__p1;
   1809   return __ret;
   1810 }
   1811 #else
   1812 __ai int16x4_t vbic_s16(int16x4_t __p0, int16x4_t __p1) {
   1813   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1814   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1815   int16x4_t __ret;
   1816   __ret = __rev0 & ~__rev1;
   1817   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1818   return __ret;
   1819 }
   1820 #endif
   1821 
   1822 #ifdef __LITTLE_ENDIAN__
   1823 __ai poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t __p1, poly8x8_t __p2) {
   1824   poly8x8_t __ret;
   1825   __ret = (poly8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 4);
   1826   return __ret;
   1827 }
   1828 #else
   1829 __ai poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t __p1, poly8x8_t __p2) {
   1830   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1831   poly8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1832   poly8x8_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
   1833   poly8x8_t __ret;
   1834   __ret = (poly8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 4);
   1835   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1836   return __ret;
   1837 }
   1838 #endif
   1839 
   1840 #ifdef __LITTLE_ENDIAN__
   1841 __ai poly16x4_t vbsl_p16(uint16x4_t __p0, poly16x4_t __p1, poly16x4_t __p2) {
   1842   poly16x4_t __ret;
   1843   __ret = (poly16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 5);
   1844   return __ret;
   1845 }
   1846 #else
   1847 __ai poly16x4_t vbsl_p16(uint16x4_t __p0, poly16x4_t __p1, poly16x4_t __p2) {
   1848   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1849   poly16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1850   poly16x4_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
   1851   poly16x4_t __ret;
   1852   __ret = (poly16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 5);
   1853   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1854   return __ret;
   1855 }
   1856 #endif
   1857 
   1858 #ifdef __LITTLE_ENDIAN__
   1859 __ai poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x16_t __p1, poly8x16_t __p2) {
   1860   poly8x16_t __ret;
   1861   __ret = (poly8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36);
   1862   return __ret;
   1863 }
   1864 #else
   1865 __ai poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x16_t __p1, poly8x16_t __p2) {
   1866   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1867   poly8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1868   poly8x16_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1869   poly8x16_t __ret;
   1870   __ret = (poly8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 36);
   1871   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1872   return __ret;
   1873 }
   1874 #endif
   1875 
   1876 #ifdef __LITTLE_ENDIAN__
   1877 __ai poly16x8_t vbslq_p16(uint16x8_t __p0, poly16x8_t __p1, poly16x8_t __p2) {
   1878   poly16x8_t __ret;
   1879   __ret = (poly16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 37);
   1880   return __ret;
   1881 }
   1882 #else
   1883 __ai poly16x8_t vbslq_p16(uint16x8_t __p0, poly16x8_t __p1, poly16x8_t __p2) {
   1884   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1885   poly16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1886   poly16x8_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
   1887   poly16x8_t __ret;
   1888   __ret = (poly16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 37);
   1889   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1890   return __ret;
   1891 }
   1892 #endif
   1893 
   1894 #ifdef __LITTLE_ENDIAN__
   1895 __ai uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) {
   1896   uint8x16_t __ret;
   1897   __ret = (uint8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48);
   1898   return __ret;
   1899 }
   1900 #else
   1901 __ai uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) {
   1902   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1903   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1904   uint8x16_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1905   uint8x16_t __ret;
   1906   __ret = (uint8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48);
   1907   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1908   return __ret;
   1909 }
   1910 #endif
   1911 
   1912 #ifdef __LITTLE_ENDIAN__
   1913 __ai uint32x4_t vbslq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
   1914   uint32x4_t __ret;
   1915   __ret = (uint32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
   1916   return __ret;
   1917 }
   1918 #else
   1919 __ai uint32x4_t vbslq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
   1920   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1921   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1922   uint32x4_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
   1923   uint32x4_t __ret;
   1924   __ret = (uint32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
   1925   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1926   return __ret;
   1927 }
   1928 #endif
   1929 
   1930 #ifdef __LITTLE_ENDIAN__
   1931 __ai uint64x2_t vbslq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
   1932   uint64x2_t __ret;
   1933   __ret = (uint64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51);
   1934   return __ret;
   1935 }
   1936 #else
   1937 __ai uint64x2_t vbslq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
   1938   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   1939   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   1940   uint64x2_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
   1941   uint64x2_t __ret;
   1942   __ret = (uint64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51);
   1943   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   1944   return __ret;
   1945 }
   1946 #endif
   1947 
   1948 #ifdef __LITTLE_ENDIAN__
   1949 __ai uint16x8_t vbslq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) {
   1950   uint16x8_t __ret;
   1951   __ret = (uint16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49);
   1952   return __ret;
   1953 }
   1954 #else
   1955 __ai uint16x8_t vbslq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) {
   1956   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   1957   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   1958   uint16x8_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
   1959   uint16x8_t __ret;
   1960   __ret = (uint16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49);
   1961   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   1962   return __ret;
   1963 }
   1964 #endif
   1965 
   1966 #ifdef __LITTLE_ENDIAN__
   1967 __ai int8x16_t vbslq_s8(uint8x16_t __p0, int8x16_t __p1, int8x16_t __p2) {
   1968   int8x16_t __ret;
   1969   __ret = (int8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32);
   1970   return __ret;
   1971 }
   1972 #else
   1973 __ai int8x16_t vbslq_s8(uint8x16_t __p0, int8x16_t __p1, int8x16_t __p2) {
   1974   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1975   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1976   int8x16_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1977   int8x16_t __ret;
   1978   __ret = (int8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32);
   1979   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   1980   return __ret;
   1981 }
   1982 #endif
   1983 
   1984 #ifdef __LITTLE_ENDIAN__
   1985 __ai float32x4_t vbslq_f32(uint32x4_t __p0, float32x4_t __p1, float32x4_t __p2) {
   1986   float32x4_t __ret;
   1987   __ret = (float32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41);
   1988   return __ret;
   1989 }
   1990 #else
   1991 __ai float32x4_t vbslq_f32(uint32x4_t __p0, float32x4_t __p1, float32x4_t __p2) {
   1992   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   1993   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   1994   float32x4_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
   1995   float32x4_t __ret;
   1996   __ret = (float32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41);
   1997   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   1998   return __ret;
   1999 }
   2000 #endif
   2001 
   2002 #ifdef __LITTLE_ENDIAN__
   2003 __ai int32x4_t vbslq_s32(uint32x4_t __p0, int32x4_t __p1, int32x4_t __p2) {
   2004   int32x4_t __ret;
   2005   __ret = (int32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34);
   2006   return __ret;
   2007 }
   2008 #else
   2009 __ai int32x4_t vbslq_s32(uint32x4_t __p0, int32x4_t __p1, int32x4_t __p2) {
   2010   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2011   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2012   int32x4_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
   2013   int32x4_t __ret;
   2014   __ret = (int32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34);
   2015   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2016   return __ret;
   2017 }
   2018 #endif
   2019 
   2020 #ifdef __LITTLE_ENDIAN__
   2021 __ai int64x2_t vbslq_s64(uint64x2_t __p0, int64x2_t __p1, int64x2_t __p2) {
   2022   int64x2_t __ret;
   2023   __ret = (int64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35);
   2024   return __ret;
   2025 }
   2026 #else
   2027 __ai int64x2_t vbslq_s64(uint64x2_t __p0, int64x2_t __p1, int64x2_t __p2) {
   2028   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2029   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2030   int64x2_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
   2031   int64x2_t __ret;
   2032   __ret = (int64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35);
   2033   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2034   return __ret;
   2035 }
   2036 #endif
   2037 
   2038 #ifdef __LITTLE_ENDIAN__
   2039 __ai int16x8_t vbslq_s16(uint16x8_t __p0, int16x8_t __p1, int16x8_t __p2) {
   2040   int16x8_t __ret;
   2041   __ret = (int16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33);
   2042   return __ret;
   2043 }
   2044 #else
   2045 __ai int16x8_t vbslq_s16(uint16x8_t __p0, int16x8_t __p1, int16x8_t __p2) {
   2046   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2047   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2048   int16x8_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
   2049   int16x8_t __ret;
   2050   __ret = (int16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33);
   2051   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2052   return __ret;
   2053 }
   2054 #endif
   2055 
   2056 #ifdef __LITTLE_ENDIAN__
   2057 __ai uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) {
   2058   uint8x8_t __ret;
   2059   __ret = (uint8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 16);
   2060   return __ret;
   2061 }
   2062 #else
   2063 __ai uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) {
   2064   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2065   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2066   uint8x8_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
   2067   uint8x8_t __ret;
   2068   __ret = (uint8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 16);
   2069   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2070   return __ret;
   2071 }
   2072 #endif
   2073 
   2074 #ifdef __LITTLE_ENDIAN__
   2075 __ai uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) {
   2076   uint32x2_t __ret;
   2077   __ret = (uint32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18);
   2078   return __ret;
   2079 }
   2080 #else
   2081 __ai uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) {
   2082   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2083   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2084   uint32x2_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
   2085   uint32x2_t __ret;
   2086   __ret = (uint32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 18);
   2087   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2088   return __ret;
   2089 }
   2090 #endif
   2091 
   2092 #ifdef __LITTLE_ENDIAN__
   2093 __ai uint64x1_t vbsl_u64(uint64x1_t __p0, uint64x1_t __p1, uint64x1_t __p2) {
   2094   uint64x1_t __ret;
   2095   __ret = (uint64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 19);
   2096   return __ret;
   2097 }
   2098 #else
   2099 __ai uint64x1_t vbsl_u64(uint64x1_t __p0, uint64x1_t __p1, uint64x1_t __p2) {
   2100   uint64x1_t __ret;
   2101   __ret = (uint64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 19);
   2102   return __ret;
   2103 }
   2104 #endif
   2105 
   2106 #ifdef __LITTLE_ENDIAN__
   2107 __ai uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) {
   2108   uint16x4_t __ret;
   2109   __ret = (uint16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 17);
   2110   return __ret;
   2111 }
   2112 #else
   2113 __ai uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) {
   2114   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2115   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2116   uint16x4_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
   2117   uint16x4_t __ret;
   2118   __ret = (uint16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 17);
   2119   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2120   return __ret;
   2121 }
   2122 #endif
   2123 
   2124 #ifdef __LITTLE_ENDIAN__
   2125 __ai int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t __p1, int8x8_t __p2) {
   2126   int8x8_t __ret;
   2127   __ret = (int8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 0);
   2128   return __ret;
   2129 }
   2130 #else
   2131 __ai int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t __p1, int8x8_t __p2) {
   2132   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2133   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2134   int8x8_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
   2135   int8x8_t __ret;
   2136   __ret = (int8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 0);
   2137   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2138   return __ret;
   2139 }
   2140 #endif
   2141 
   2142 #ifdef __LITTLE_ENDIAN__
   2143 __ai float32x2_t vbsl_f32(uint32x2_t __p0, float32x2_t __p1, float32x2_t __p2) {
   2144   float32x2_t __ret;
   2145   __ret = (float32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9);
   2146   return __ret;
   2147 }
   2148 #else
   2149 __ai float32x2_t vbsl_f32(uint32x2_t __p0, float32x2_t __p1, float32x2_t __p2) {
   2150   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2151   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2152   float32x2_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
   2153   float32x2_t __ret;
   2154   __ret = (float32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9);
   2155   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2156   return __ret;
   2157 }
   2158 #endif
   2159 
   2160 #ifdef __LITTLE_ENDIAN__
   2161 __ai int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) {
   2162   int32x2_t __ret;
   2163   __ret = (int32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2);
   2164   return __ret;
   2165 }
   2166 #else
   2167 __ai int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) {
   2168   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2169   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2170   int32x2_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
   2171   int32x2_t __ret;
   2172   __ret = (int32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2);
   2173   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2174   return __ret;
   2175 }
   2176 #endif
   2177 
   2178 #ifdef __LITTLE_ENDIAN__
   2179 __ai int64x1_t vbsl_s64(uint64x1_t __p0, int64x1_t __p1, int64x1_t __p2) {
   2180   int64x1_t __ret;
   2181   __ret = (int64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 3);
   2182   return __ret;
   2183 }
   2184 #else
   2185 __ai int64x1_t vbsl_s64(uint64x1_t __p0, int64x1_t __p1, int64x1_t __p2) {
   2186   int64x1_t __ret;
   2187   __ret = (int64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 3);
   2188   return __ret;
   2189 }
   2190 #endif
   2191 
   2192 #ifdef __LITTLE_ENDIAN__
   2193 __ai int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) {
   2194   int16x4_t __ret;
   2195   __ret = (int16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1);
   2196   return __ret;
   2197 }
   2198 #else
   2199 __ai int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) {
   2200   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2201   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2202   int16x4_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
   2203   int16x4_t __ret;
   2204   __ret = (int16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 1);
   2205   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2206   return __ret;
   2207 }
   2208 #endif
   2209 
   2210 #ifdef __LITTLE_ENDIAN__
   2211 __ai uint32x4_t vcageq_f32(float32x4_t __p0, float32x4_t __p1) {
   2212   uint32x4_t __ret;
   2213   __ret = (uint32x4_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
   2214   return __ret;
   2215 }
   2216 #else
   2217 __ai uint32x4_t vcageq_f32(float32x4_t __p0, float32x4_t __p1) {
   2218   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2219   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2220   uint32x4_t __ret;
   2221   __ret = (uint32x4_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
   2222   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2223   return __ret;
   2224 }
   2225 #endif
   2226 
   2227 #ifdef __LITTLE_ENDIAN__
   2228 __ai uint32x2_t vcage_f32(float32x2_t __p0, float32x2_t __p1) {
   2229   uint32x2_t __ret;
   2230   __ret = (uint32x2_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
   2231   return __ret;
   2232 }
   2233 #else
   2234 __ai uint32x2_t vcage_f32(float32x2_t __p0, float32x2_t __p1) {
   2235   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2236   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2237   uint32x2_t __ret;
   2238   __ret = (uint32x2_t) __builtin_neon_vcage_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18);
   2239   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2240   return __ret;
   2241 }
   2242 #endif
   2243 
   2244 #ifdef __LITTLE_ENDIAN__
   2245 __ai uint32x4_t vcagtq_f32(float32x4_t __p0, float32x4_t __p1) {
   2246   uint32x4_t __ret;
   2247   __ret = (uint32x4_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
   2248   return __ret;
   2249 }
   2250 #else
   2251 __ai uint32x4_t vcagtq_f32(float32x4_t __p0, float32x4_t __p1) {
   2252   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2253   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2254   uint32x4_t __ret;
   2255   __ret = (uint32x4_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
   2256   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2257   return __ret;
   2258 }
   2259 #endif
   2260 
   2261 #ifdef __LITTLE_ENDIAN__
   2262 __ai uint32x2_t vcagt_f32(float32x2_t __p0, float32x2_t __p1) {
   2263   uint32x2_t __ret;
   2264   __ret = (uint32x2_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
   2265   return __ret;
   2266 }
   2267 #else
   2268 __ai uint32x2_t vcagt_f32(float32x2_t __p0, float32x2_t __p1) {
   2269   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2270   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2271   uint32x2_t __ret;
   2272   __ret = (uint32x2_t) __builtin_neon_vcagt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18);
   2273   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2274   return __ret;
   2275 }
   2276 #endif
   2277 
   2278 #ifdef __LITTLE_ENDIAN__
   2279 __ai uint32x4_t vcaleq_f32(float32x4_t __p0, float32x4_t __p1) {
   2280   uint32x4_t __ret;
   2281   __ret = (uint32x4_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
   2282   return __ret;
   2283 }
   2284 #else
   2285 __ai uint32x4_t vcaleq_f32(float32x4_t __p0, float32x4_t __p1) {
   2286   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2287   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2288   uint32x4_t __ret;
   2289   __ret = (uint32x4_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
   2290   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2291   return __ret;
   2292 }
   2293 #endif
   2294 
   2295 #ifdef __LITTLE_ENDIAN__
   2296 __ai uint32x2_t vcale_f32(float32x2_t __p0, float32x2_t __p1) {
   2297   uint32x2_t __ret;
   2298   __ret = (uint32x2_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
   2299   return __ret;
   2300 }
   2301 #else
   2302 __ai uint32x2_t vcale_f32(float32x2_t __p0, float32x2_t __p1) {
   2303   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2304   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2305   uint32x2_t __ret;
   2306   __ret = (uint32x2_t) __builtin_neon_vcale_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18);
   2307   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2308   return __ret;
   2309 }
   2310 #endif
   2311 
   2312 #ifdef __LITTLE_ENDIAN__
   2313 __ai uint32x4_t vcaltq_f32(float32x4_t __p0, float32x4_t __p1) {
   2314   uint32x4_t __ret;
   2315   __ret = (uint32x4_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
   2316   return __ret;
   2317 }
   2318 #else
   2319 __ai uint32x4_t vcaltq_f32(float32x4_t __p0, float32x4_t __p1) {
   2320   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2321   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2322   uint32x4_t __ret;
   2323   __ret = (uint32x4_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
   2324   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2325   return __ret;
   2326 }
   2327 #endif
   2328 
   2329 #ifdef __LITTLE_ENDIAN__
   2330 __ai uint32x2_t vcalt_f32(float32x2_t __p0, float32x2_t __p1) {
   2331   uint32x2_t __ret;
   2332   __ret = (uint32x2_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
   2333   return __ret;
   2334 }
   2335 #else
   2336 __ai uint32x2_t vcalt_f32(float32x2_t __p0, float32x2_t __p1) {
   2337   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2338   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2339   uint32x2_t __ret;
   2340   __ret = (uint32x2_t) __builtin_neon_vcalt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18);
   2341   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2342   return __ret;
   2343 }
   2344 #endif
   2345 
   2346 #ifdef __LITTLE_ENDIAN__
   2347 __ai uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t __p1) {
   2348   uint8x8_t __ret;
   2349   __ret = (uint8x8_t)(__p0 == __p1);
   2350   return __ret;
   2351 }
   2352 #else
   2353 __ai uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t __p1) {
   2354   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2355   poly8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2356   uint8x8_t __ret;
   2357   __ret = (uint8x8_t)(__rev0 == __rev1);
   2358   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2359   return __ret;
   2360 }
   2361 #endif
   2362 
   2363 #ifdef __LITTLE_ENDIAN__
   2364 __ai uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x16_t __p1) {
   2365   uint8x16_t __ret;
   2366   __ret = (uint8x16_t)(__p0 == __p1);
   2367   return __ret;
   2368 }
   2369 #else
   2370 __ai uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x16_t __p1) {
   2371   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2372   poly8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2373   uint8x16_t __ret;
   2374   __ret = (uint8x16_t)(__rev0 == __rev1);
   2375   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2376   return __ret;
   2377 }
   2378 #endif
   2379 
   2380 #ifdef __LITTLE_ENDIAN__
   2381 __ai uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   2382   uint8x16_t __ret;
   2383   __ret = (uint8x16_t)(__p0 == __p1);
   2384   return __ret;
   2385 }
   2386 #else
   2387 __ai uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   2388   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2389   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2390   uint8x16_t __ret;
   2391   __ret = (uint8x16_t)(__rev0 == __rev1);
   2392   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2393   return __ret;
   2394 }
   2395 #endif
   2396 
   2397 #ifdef __LITTLE_ENDIAN__
   2398 __ai uint32x4_t vceqq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   2399   uint32x4_t __ret;
   2400   __ret = (uint32x4_t)(__p0 == __p1);
   2401   return __ret;
   2402 }
   2403 #else
   2404 __ai uint32x4_t vceqq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   2405   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2406   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2407   uint32x4_t __ret;
   2408   __ret = (uint32x4_t)(__rev0 == __rev1);
   2409   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2410   return __ret;
   2411 }
   2412 #endif
   2413 
   2414 #ifdef __LITTLE_ENDIAN__
   2415 __ai uint16x8_t vceqq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   2416   uint16x8_t __ret;
   2417   __ret = (uint16x8_t)(__p0 == __p1);
   2418   return __ret;
   2419 }
   2420 #else
   2421 __ai uint16x8_t vceqq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   2422   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2423   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2424   uint16x8_t __ret;
   2425   __ret = (uint16x8_t)(__rev0 == __rev1);
   2426   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2427   return __ret;
   2428 }
   2429 #endif
   2430 
   2431 #ifdef __LITTLE_ENDIAN__
   2432 __ai uint8x16_t vceqq_s8(int8x16_t __p0, int8x16_t __p1) {
   2433   uint8x16_t __ret;
   2434   __ret = (uint8x16_t)(__p0 == __p1);
   2435   return __ret;
   2436 }
   2437 #else
   2438 __ai uint8x16_t vceqq_s8(int8x16_t __p0, int8x16_t __p1) {
   2439   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2440   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2441   uint8x16_t __ret;
   2442   __ret = (uint8x16_t)(__rev0 == __rev1);
   2443   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2444   return __ret;
   2445 }
   2446 #endif
   2447 
   2448 #ifdef __LITTLE_ENDIAN__
   2449 __ai uint32x4_t vceqq_f32(float32x4_t __p0, float32x4_t __p1) {
   2450   uint32x4_t __ret;
   2451   __ret = (uint32x4_t)(__p0 == __p1);
   2452   return __ret;
   2453 }
   2454 #else
   2455 __ai uint32x4_t vceqq_f32(float32x4_t __p0, float32x4_t __p1) {
   2456   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2457   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2458   uint32x4_t __ret;
   2459   __ret = (uint32x4_t)(__rev0 == __rev1);
   2460   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2461   return __ret;
   2462 }
   2463 #endif
   2464 
   2465 #ifdef __LITTLE_ENDIAN__
   2466 __ai uint32x4_t vceqq_s32(int32x4_t __p0, int32x4_t __p1) {
   2467   uint32x4_t __ret;
   2468   __ret = (uint32x4_t)(__p0 == __p1);
   2469   return __ret;
   2470 }
   2471 #else
   2472 __ai uint32x4_t vceqq_s32(int32x4_t __p0, int32x4_t __p1) {
   2473   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2474   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2475   uint32x4_t __ret;
   2476   __ret = (uint32x4_t)(__rev0 == __rev1);
   2477   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2478   return __ret;
   2479 }
   2480 #endif
   2481 
   2482 #ifdef __LITTLE_ENDIAN__
   2483 __ai uint16x8_t vceqq_s16(int16x8_t __p0, int16x8_t __p1) {
   2484   uint16x8_t __ret;
   2485   __ret = (uint16x8_t)(__p0 == __p1);
   2486   return __ret;
   2487 }
   2488 #else
   2489 __ai uint16x8_t vceqq_s16(int16x8_t __p0, int16x8_t __p1) {
   2490   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2491   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2492   uint16x8_t __ret;
   2493   __ret = (uint16x8_t)(__rev0 == __rev1);
   2494   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2495   return __ret;
   2496 }
   2497 #endif
   2498 
   2499 #ifdef __LITTLE_ENDIAN__
   2500 __ai uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t __p1) {
   2501   uint8x8_t __ret;
   2502   __ret = (uint8x8_t)(__p0 == __p1);
   2503   return __ret;
   2504 }
   2505 #else
   2506 __ai uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t __p1) {
   2507   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2508   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2509   uint8x8_t __ret;
   2510   __ret = (uint8x8_t)(__rev0 == __rev1);
   2511   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2512   return __ret;
   2513 }
   2514 #endif
   2515 
   2516 #ifdef __LITTLE_ENDIAN__
   2517 __ai uint32x2_t vceq_u32(uint32x2_t __p0, uint32x2_t __p1) {
   2518   uint32x2_t __ret;
   2519   __ret = (uint32x2_t)(__p0 == __p1);
   2520   return __ret;
   2521 }
   2522 #else
   2523 __ai uint32x2_t vceq_u32(uint32x2_t __p0, uint32x2_t __p1) {
   2524   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2525   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2526   uint32x2_t __ret;
   2527   __ret = (uint32x2_t)(__rev0 == __rev1);
   2528   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2529   return __ret;
   2530 }
   2531 #endif
   2532 
   2533 #ifdef __LITTLE_ENDIAN__
   2534 __ai uint16x4_t vceq_u16(uint16x4_t __p0, uint16x4_t __p1) {
   2535   uint16x4_t __ret;
   2536   __ret = (uint16x4_t)(__p0 == __p1);
   2537   return __ret;
   2538 }
   2539 #else
   2540 __ai uint16x4_t vceq_u16(uint16x4_t __p0, uint16x4_t __p1) {
   2541   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2542   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2543   uint16x4_t __ret;
   2544   __ret = (uint16x4_t)(__rev0 == __rev1);
   2545   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2546   return __ret;
   2547 }
   2548 #endif
   2549 
   2550 #ifdef __LITTLE_ENDIAN__
   2551 __ai uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t __p1) {
   2552   uint8x8_t __ret;
   2553   __ret = (uint8x8_t)(__p0 == __p1);
   2554   return __ret;
   2555 }
   2556 #else
   2557 __ai uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t __p1) {
   2558   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2559   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2560   uint8x8_t __ret;
   2561   __ret = (uint8x8_t)(__rev0 == __rev1);
   2562   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2563   return __ret;
   2564 }
   2565 #endif
   2566 
   2567 #ifdef __LITTLE_ENDIAN__
   2568 __ai uint32x2_t vceq_f32(float32x2_t __p0, float32x2_t __p1) {
   2569   uint32x2_t __ret;
   2570   __ret = (uint32x2_t)(__p0 == __p1);
   2571   return __ret;
   2572 }
   2573 #else
   2574 __ai uint32x2_t vceq_f32(float32x2_t __p0, float32x2_t __p1) {
   2575   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2576   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2577   uint32x2_t __ret;
   2578   __ret = (uint32x2_t)(__rev0 == __rev1);
   2579   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2580   return __ret;
   2581 }
   2582 #endif
   2583 
   2584 #ifdef __LITTLE_ENDIAN__
   2585 __ai uint32x2_t vceq_s32(int32x2_t __p0, int32x2_t __p1) {
   2586   uint32x2_t __ret;
   2587   __ret = (uint32x2_t)(__p0 == __p1);
   2588   return __ret;
   2589 }
   2590 #else
   2591 __ai uint32x2_t vceq_s32(int32x2_t __p0, int32x2_t __p1) {
   2592   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2593   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2594   uint32x2_t __ret;
   2595   __ret = (uint32x2_t)(__rev0 == __rev1);
   2596   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2597   return __ret;
   2598 }
   2599 #endif
   2600 
   2601 #ifdef __LITTLE_ENDIAN__
   2602 __ai uint16x4_t vceq_s16(int16x4_t __p0, int16x4_t __p1) {
   2603   uint16x4_t __ret;
   2604   __ret = (uint16x4_t)(__p0 == __p1);
   2605   return __ret;
   2606 }
   2607 #else
   2608 __ai uint16x4_t vceq_s16(int16x4_t __p0, int16x4_t __p1) {
   2609   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2610   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2611   uint16x4_t __ret;
   2612   __ret = (uint16x4_t)(__rev0 == __rev1);
   2613   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2614   return __ret;
   2615 }
   2616 #endif
   2617 
   2618 #ifdef __LITTLE_ENDIAN__
   2619 __ai uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   2620   uint8x16_t __ret;
   2621   __ret = (uint8x16_t)(__p0 >= __p1);
   2622   return __ret;
   2623 }
   2624 #else
   2625 __ai uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   2626   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2627   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2628   uint8x16_t __ret;
   2629   __ret = (uint8x16_t)(__rev0 >= __rev1);
   2630   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2631   return __ret;
   2632 }
   2633 #endif
   2634 
   2635 #ifdef __LITTLE_ENDIAN__
   2636 __ai uint32x4_t vcgeq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   2637   uint32x4_t __ret;
   2638   __ret = (uint32x4_t)(__p0 >= __p1);
   2639   return __ret;
   2640 }
   2641 #else
   2642 __ai uint32x4_t vcgeq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   2643   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2644   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2645   uint32x4_t __ret;
   2646   __ret = (uint32x4_t)(__rev0 >= __rev1);
   2647   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2648   return __ret;
   2649 }
   2650 #endif
   2651 
   2652 #ifdef __LITTLE_ENDIAN__
   2653 __ai uint16x8_t vcgeq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   2654   uint16x8_t __ret;
   2655   __ret = (uint16x8_t)(__p0 >= __p1);
   2656   return __ret;
   2657 }
   2658 #else
   2659 __ai uint16x8_t vcgeq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   2660   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2661   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2662   uint16x8_t __ret;
   2663   __ret = (uint16x8_t)(__rev0 >= __rev1);
   2664   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2665   return __ret;
   2666 }
   2667 #endif
   2668 
   2669 #ifdef __LITTLE_ENDIAN__
   2670 __ai uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16_t __p1) {
   2671   uint8x16_t __ret;
   2672   __ret = (uint8x16_t)(__p0 >= __p1);
   2673   return __ret;
   2674 }
   2675 #else
   2676 __ai uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16_t __p1) {
   2677   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2678   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2679   uint8x16_t __ret;
   2680   __ret = (uint8x16_t)(__rev0 >= __rev1);
   2681   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2682   return __ret;
   2683 }
   2684 #endif
   2685 
   2686 #ifdef __LITTLE_ENDIAN__
   2687 __ai uint32x4_t vcgeq_f32(float32x4_t __p0, float32x4_t __p1) {
   2688   uint32x4_t __ret;
   2689   __ret = (uint32x4_t)(__p0 >= __p1);
   2690   return __ret;
   2691 }
   2692 #else
   2693 __ai uint32x4_t vcgeq_f32(float32x4_t __p0, float32x4_t __p1) {
   2694   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2695   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2696   uint32x4_t __ret;
   2697   __ret = (uint32x4_t)(__rev0 >= __rev1);
   2698   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2699   return __ret;
   2700 }
   2701 #endif
   2702 
   2703 #ifdef __LITTLE_ENDIAN__
   2704 __ai uint32x4_t vcgeq_s32(int32x4_t __p0, int32x4_t __p1) {
   2705   uint32x4_t __ret;
   2706   __ret = (uint32x4_t)(__p0 >= __p1);
   2707   return __ret;
   2708 }
   2709 #else
   2710 __ai uint32x4_t vcgeq_s32(int32x4_t __p0, int32x4_t __p1) {
   2711   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2712   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2713   uint32x4_t __ret;
   2714   __ret = (uint32x4_t)(__rev0 >= __rev1);
   2715   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2716   return __ret;
   2717 }
   2718 #endif
   2719 
   2720 #ifdef __LITTLE_ENDIAN__
   2721 __ai uint16x8_t vcgeq_s16(int16x8_t __p0, int16x8_t __p1) {
   2722   uint16x8_t __ret;
   2723   __ret = (uint16x8_t)(__p0 >= __p1);
   2724   return __ret;
   2725 }
   2726 #else
   2727 __ai uint16x8_t vcgeq_s16(int16x8_t __p0, int16x8_t __p1) {
   2728   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2729   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2730   uint16x8_t __ret;
   2731   __ret = (uint16x8_t)(__rev0 >= __rev1);
   2732   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2733   return __ret;
   2734 }
   2735 #endif
   2736 
   2737 #ifdef __LITTLE_ENDIAN__
   2738 __ai uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t __p1) {
   2739   uint8x8_t __ret;
   2740   __ret = (uint8x8_t)(__p0 >= __p1);
   2741   return __ret;
   2742 }
   2743 #else
   2744 __ai uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t __p1) {
   2745   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2746   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2747   uint8x8_t __ret;
   2748   __ret = (uint8x8_t)(__rev0 >= __rev1);
   2749   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2750   return __ret;
   2751 }
   2752 #endif
   2753 
   2754 #ifdef __LITTLE_ENDIAN__
   2755 __ai uint32x2_t vcge_u32(uint32x2_t __p0, uint32x2_t __p1) {
   2756   uint32x2_t __ret;
   2757   __ret = (uint32x2_t)(__p0 >= __p1);
   2758   return __ret;
   2759 }
   2760 #else
   2761 __ai uint32x2_t vcge_u32(uint32x2_t __p0, uint32x2_t __p1) {
   2762   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2763   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2764   uint32x2_t __ret;
   2765   __ret = (uint32x2_t)(__rev0 >= __rev1);
   2766   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2767   return __ret;
   2768 }
   2769 #endif
   2770 
   2771 #ifdef __LITTLE_ENDIAN__
   2772 __ai uint16x4_t vcge_u16(uint16x4_t __p0, uint16x4_t __p1) {
   2773   uint16x4_t __ret;
   2774   __ret = (uint16x4_t)(__p0 >= __p1);
   2775   return __ret;
   2776 }
   2777 #else
   2778 __ai uint16x4_t vcge_u16(uint16x4_t __p0, uint16x4_t __p1) {
   2779   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2780   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2781   uint16x4_t __ret;
   2782   __ret = (uint16x4_t)(__rev0 >= __rev1);
   2783   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2784   return __ret;
   2785 }
   2786 #endif
   2787 
   2788 #ifdef __LITTLE_ENDIAN__
   2789 __ai uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t __p1) {
   2790   uint8x8_t __ret;
   2791   __ret = (uint8x8_t)(__p0 >= __p1);
   2792   return __ret;
   2793 }
   2794 #else
   2795 __ai uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t __p1) {
   2796   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2797   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2798   uint8x8_t __ret;
   2799   __ret = (uint8x8_t)(__rev0 >= __rev1);
   2800   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2801   return __ret;
   2802 }
   2803 #endif
   2804 
   2805 #ifdef __LITTLE_ENDIAN__
   2806 __ai uint32x2_t vcge_f32(float32x2_t __p0, float32x2_t __p1) {
   2807   uint32x2_t __ret;
   2808   __ret = (uint32x2_t)(__p0 >= __p1);
   2809   return __ret;
   2810 }
   2811 #else
   2812 __ai uint32x2_t vcge_f32(float32x2_t __p0, float32x2_t __p1) {
   2813   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2814   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2815   uint32x2_t __ret;
   2816   __ret = (uint32x2_t)(__rev0 >= __rev1);
   2817   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2818   return __ret;
   2819 }
   2820 #endif
   2821 
   2822 #ifdef __LITTLE_ENDIAN__
   2823 __ai uint32x2_t vcge_s32(int32x2_t __p0, int32x2_t __p1) {
   2824   uint32x2_t __ret;
   2825   __ret = (uint32x2_t)(__p0 >= __p1);
   2826   return __ret;
   2827 }
   2828 #else
   2829 __ai uint32x2_t vcge_s32(int32x2_t __p0, int32x2_t __p1) {
   2830   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   2831   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   2832   uint32x2_t __ret;
   2833   __ret = (uint32x2_t)(__rev0 >= __rev1);
   2834   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   2835   return __ret;
   2836 }
   2837 #endif
   2838 
   2839 #ifdef __LITTLE_ENDIAN__
   2840 __ai uint16x4_t vcge_s16(int16x4_t __p0, int16x4_t __p1) {
   2841   uint16x4_t __ret;
   2842   __ret = (uint16x4_t)(__p0 >= __p1);
   2843   return __ret;
   2844 }
   2845 #else
   2846 __ai uint16x4_t vcge_s16(int16x4_t __p0, int16x4_t __p1) {
   2847   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2848   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2849   uint16x4_t __ret;
   2850   __ret = (uint16x4_t)(__rev0 >= __rev1);
   2851   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2852   return __ret;
   2853 }
   2854 #endif
   2855 
   2856 #ifdef __LITTLE_ENDIAN__
   2857 __ai uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   2858   uint8x16_t __ret;
   2859   __ret = (uint8x16_t)(__p0 > __p1);
   2860   return __ret;
   2861 }
   2862 #else
   2863 __ai uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   2864   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2865   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2866   uint8x16_t __ret;
   2867   __ret = (uint8x16_t)(__rev0 > __rev1);
   2868   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2869   return __ret;
   2870 }
   2871 #endif
   2872 
   2873 #ifdef __LITTLE_ENDIAN__
   2874 __ai uint32x4_t vcgtq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   2875   uint32x4_t __ret;
   2876   __ret = (uint32x4_t)(__p0 > __p1);
   2877   return __ret;
   2878 }
   2879 #else
   2880 __ai uint32x4_t vcgtq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   2881   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2882   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2883   uint32x4_t __ret;
   2884   __ret = (uint32x4_t)(__rev0 > __rev1);
   2885   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2886   return __ret;
   2887 }
   2888 #endif
   2889 
   2890 #ifdef __LITTLE_ENDIAN__
   2891 __ai uint16x8_t vcgtq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   2892   uint16x8_t __ret;
   2893   __ret = (uint16x8_t)(__p0 > __p1);
   2894   return __ret;
   2895 }
   2896 #else
   2897 __ai uint16x8_t vcgtq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   2898   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2899   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2900   uint16x8_t __ret;
   2901   __ret = (uint16x8_t)(__rev0 > __rev1);
   2902   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2903   return __ret;
   2904 }
   2905 #endif
   2906 
   2907 #ifdef __LITTLE_ENDIAN__
   2908 __ai uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16_t __p1) {
   2909   uint8x16_t __ret;
   2910   __ret = (uint8x16_t)(__p0 > __p1);
   2911   return __ret;
   2912 }
   2913 #else
   2914 __ai uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16_t __p1) {
   2915   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2916   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2917   uint8x16_t __ret;
   2918   __ret = (uint8x16_t)(__rev0 > __rev1);
   2919   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   2920   return __ret;
   2921 }
   2922 #endif
   2923 
   2924 #ifdef __LITTLE_ENDIAN__
   2925 __ai uint32x4_t vcgtq_f32(float32x4_t __p0, float32x4_t __p1) {
   2926   uint32x4_t __ret;
   2927   __ret = (uint32x4_t)(__p0 > __p1);
   2928   return __ret;
   2929 }
   2930 #else
   2931 __ai uint32x4_t vcgtq_f32(float32x4_t __p0, float32x4_t __p1) {
   2932   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2933   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2934   uint32x4_t __ret;
   2935   __ret = (uint32x4_t)(__rev0 > __rev1);
   2936   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2937   return __ret;
   2938 }
   2939 #endif
   2940 
   2941 #ifdef __LITTLE_ENDIAN__
   2942 __ai uint32x4_t vcgtq_s32(int32x4_t __p0, int32x4_t __p1) {
   2943   uint32x4_t __ret;
   2944   __ret = (uint32x4_t)(__p0 > __p1);
   2945   return __ret;
   2946 }
   2947 #else
   2948 __ai uint32x4_t vcgtq_s32(int32x4_t __p0, int32x4_t __p1) {
   2949   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   2950   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   2951   uint32x4_t __ret;
   2952   __ret = (uint32x4_t)(__rev0 > __rev1);
   2953   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   2954   return __ret;
   2955 }
   2956 #endif
   2957 
   2958 #ifdef __LITTLE_ENDIAN__
   2959 __ai uint16x8_t vcgtq_s16(int16x8_t __p0, int16x8_t __p1) {
   2960   uint16x8_t __ret;
   2961   __ret = (uint16x8_t)(__p0 > __p1);
   2962   return __ret;
   2963 }
   2964 #else
   2965 __ai uint16x8_t vcgtq_s16(int16x8_t __p0, int16x8_t __p1) {
   2966   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2967   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2968   uint16x8_t __ret;
   2969   __ret = (uint16x8_t)(__rev0 > __rev1);
   2970   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2971   return __ret;
   2972 }
   2973 #endif
   2974 
   2975 #ifdef __LITTLE_ENDIAN__
   2976 __ai uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t __p1) {
   2977   uint8x8_t __ret;
   2978   __ret = (uint8x8_t)(__p0 > __p1);
   2979   return __ret;
   2980 }
   2981 #else
   2982 __ai uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t __p1) {
   2983   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   2984   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   2985   uint8x8_t __ret;
   2986   __ret = (uint8x8_t)(__rev0 > __rev1);
   2987   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   2988   return __ret;
   2989 }
   2990 #endif
   2991 
   2992 #ifdef __LITTLE_ENDIAN__
   2993 __ai uint32x2_t vcgt_u32(uint32x2_t __p0, uint32x2_t __p1) {
   2994   uint32x2_t __ret;
   2995   __ret = (uint32x2_t)(__p0 > __p1);
   2996   return __ret;
   2997 }
   2998 #else
   2999 __ai uint32x2_t vcgt_u32(uint32x2_t __p0, uint32x2_t __p1) {
   3000   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3001   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3002   uint32x2_t __ret;
   3003   __ret = (uint32x2_t)(__rev0 > __rev1);
   3004   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3005   return __ret;
   3006 }
   3007 #endif
   3008 
   3009 #ifdef __LITTLE_ENDIAN__
   3010 __ai uint16x4_t vcgt_u16(uint16x4_t __p0, uint16x4_t __p1) {
   3011   uint16x4_t __ret;
   3012   __ret = (uint16x4_t)(__p0 > __p1);
   3013   return __ret;
   3014 }
   3015 #else
   3016 __ai uint16x4_t vcgt_u16(uint16x4_t __p0, uint16x4_t __p1) {
   3017   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3018   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3019   uint16x4_t __ret;
   3020   __ret = (uint16x4_t)(__rev0 > __rev1);
   3021   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3022   return __ret;
   3023 }
   3024 #endif
   3025 
   3026 #ifdef __LITTLE_ENDIAN__
   3027 __ai uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t __p1) {
   3028   uint8x8_t __ret;
   3029   __ret = (uint8x8_t)(__p0 > __p1);
   3030   return __ret;
   3031 }
   3032 #else
   3033 __ai uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t __p1) {
   3034   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3035   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3036   uint8x8_t __ret;
   3037   __ret = (uint8x8_t)(__rev0 > __rev1);
   3038   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3039   return __ret;
   3040 }
   3041 #endif
   3042 
   3043 #ifdef __LITTLE_ENDIAN__
   3044 __ai uint32x2_t vcgt_f32(float32x2_t __p0, float32x2_t __p1) {
   3045   uint32x2_t __ret;
   3046   __ret = (uint32x2_t)(__p0 > __p1);
   3047   return __ret;
   3048 }
   3049 #else
   3050 __ai uint32x2_t vcgt_f32(float32x2_t __p0, float32x2_t __p1) {
   3051   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3052   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3053   uint32x2_t __ret;
   3054   __ret = (uint32x2_t)(__rev0 > __rev1);
   3055   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3056   return __ret;
   3057 }
   3058 #endif
   3059 
   3060 #ifdef __LITTLE_ENDIAN__
   3061 __ai uint32x2_t vcgt_s32(int32x2_t __p0, int32x2_t __p1) {
   3062   uint32x2_t __ret;
   3063   __ret = (uint32x2_t)(__p0 > __p1);
   3064   return __ret;
   3065 }
   3066 #else
   3067 __ai uint32x2_t vcgt_s32(int32x2_t __p0, int32x2_t __p1) {
   3068   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3069   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3070   uint32x2_t __ret;
   3071   __ret = (uint32x2_t)(__rev0 > __rev1);
   3072   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3073   return __ret;
   3074 }
   3075 #endif
   3076 
   3077 #ifdef __LITTLE_ENDIAN__
   3078 __ai uint16x4_t vcgt_s16(int16x4_t __p0, int16x4_t __p1) {
   3079   uint16x4_t __ret;
   3080   __ret = (uint16x4_t)(__p0 > __p1);
   3081   return __ret;
   3082 }
   3083 #else
   3084 __ai uint16x4_t vcgt_s16(int16x4_t __p0, int16x4_t __p1) {
   3085   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3086   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3087   uint16x4_t __ret;
   3088   __ret = (uint16x4_t)(__rev0 > __rev1);
   3089   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3090   return __ret;
   3091 }
   3092 #endif
   3093 
   3094 #ifdef __LITTLE_ENDIAN__
   3095 __ai uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   3096   uint8x16_t __ret;
   3097   __ret = (uint8x16_t)(__p0 <= __p1);
   3098   return __ret;
   3099 }
   3100 #else
   3101 __ai uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   3102   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3103   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3104   uint8x16_t __ret;
   3105   __ret = (uint8x16_t)(__rev0 <= __rev1);
   3106   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3107   return __ret;
   3108 }
   3109 #endif
   3110 
   3111 #ifdef __LITTLE_ENDIAN__
   3112 __ai uint32x4_t vcleq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   3113   uint32x4_t __ret;
   3114   __ret = (uint32x4_t)(__p0 <= __p1);
   3115   return __ret;
   3116 }
   3117 #else
   3118 __ai uint32x4_t vcleq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   3119   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3120   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3121   uint32x4_t __ret;
   3122   __ret = (uint32x4_t)(__rev0 <= __rev1);
   3123   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3124   return __ret;
   3125 }
   3126 #endif
   3127 
   3128 #ifdef __LITTLE_ENDIAN__
   3129 __ai uint16x8_t vcleq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   3130   uint16x8_t __ret;
   3131   __ret = (uint16x8_t)(__p0 <= __p1);
   3132   return __ret;
   3133 }
   3134 #else
   3135 __ai uint16x8_t vcleq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   3136   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3137   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3138   uint16x8_t __ret;
   3139   __ret = (uint16x8_t)(__rev0 <= __rev1);
   3140   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3141   return __ret;
   3142 }
   3143 #endif
   3144 
   3145 #ifdef __LITTLE_ENDIAN__
   3146 __ai uint8x16_t vcleq_s8(int8x16_t __p0, int8x16_t __p1) {
   3147   uint8x16_t __ret;
   3148   __ret = (uint8x16_t)(__p0 <= __p1);
   3149   return __ret;
   3150 }
   3151 #else
   3152 __ai uint8x16_t vcleq_s8(int8x16_t __p0, int8x16_t __p1) {
   3153   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3154   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3155   uint8x16_t __ret;
   3156   __ret = (uint8x16_t)(__rev0 <= __rev1);
   3157   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3158   return __ret;
   3159 }
   3160 #endif
   3161 
   3162 #ifdef __LITTLE_ENDIAN__
   3163 __ai uint32x4_t vcleq_f32(float32x4_t __p0, float32x4_t __p1) {
   3164   uint32x4_t __ret;
   3165   __ret = (uint32x4_t)(__p0 <= __p1);
   3166   return __ret;
   3167 }
   3168 #else
   3169 __ai uint32x4_t vcleq_f32(float32x4_t __p0, float32x4_t __p1) {
   3170   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3171   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3172   uint32x4_t __ret;
   3173   __ret = (uint32x4_t)(__rev0 <= __rev1);
   3174   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3175   return __ret;
   3176 }
   3177 #endif
   3178 
   3179 #ifdef __LITTLE_ENDIAN__
   3180 __ai uint32x4_t vcleq_s32(int32x4_t __p0, int32x4_t __p1) {
   3181   uint32x4_t __ret;
   3182   __ret = (uint32x4_t)(__p0 <= __p1);
   3183   return __ret;
   3184 }
   3185 #else
   3186 __ai uint32x4_t vcleq_s32(int32x4_t __p0, int32x4_t __p1) {
   3187   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3188   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3189   uint32x4_t __ret;
   3190   __ret = (uint32x4_t)(__rev0 <= __rev1);
   3191   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3192   return __ret;
   3193 }
   3194 #endif
   3195 
   3196 #ifdef __LITTLE_ENDIAN__
   3197 __ai uint16x8_t vcleq_s16(int16x8_t __p0, int16x8_t __p1) {
   3198   uint16x8_t __ret;
   3199   __ret = (uint16x8_t)(__p0 <= __p1);
   3200   return __ret;
   3201 }
   3202 #else
   3203 __ai uint16x8_t vcleq_s16(int16x8_t __p0, int16x8_t __p1) {
   3204   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3205   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3206   uint16x8_t __ret;
   3207   __ret = (uint16x8_t)(__rev0 <= __rev1);
   3208   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3209   return __ret;
   3210 }
   3211 #endif
   3212 
   3213 #ifdef __LITTLE_ENDIAN__
   3214 __ai uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t __p1) {
   3215   uint8x8_t __ret;
   3216   __ret = (uint8x8_t)(__p0 <= __p1);
   3217   return __ret;
   3218 }
   3219 #else
   3220 __ai uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t __p1) {
   3221   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3222   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3223   uint8x8_t __ret;
   3224   __ret = (uint8x8_t)(__rev0 <= __rev1);
   3225   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3226   return __ret;
   3227 }
   3228 #endif
   3229 
   3230 #ifdef __LITTLE_ENDIAN__
   3231 __ai uint32x2_t vcle_u32(uint32x2_t __p0, uint32x2_t __p1) {
   3232   uint32x2_t __ret;
   3233   __ret = (uint32x2_t)(__p0 <= __p1);
   3234   return __ret;
   3235 }
   3236 #else
   3237 __ai uint32x2_t vcle_u32(uint32x2_t __p0, uint32x2_t __p1) {
   3238   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3239   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3240   uint32x2_t __ret;
   3241   __ret = (uint32x2_t)(__rev0 <= __rev1);
   3242   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3243   return __ret;
   3244 }
   3245 #endif
   3246 
   3247 #ifdef __LITTLE_ENDIAN__
   3248 __ai uint16x4_t vcle_u16(uint16x4_t __p0, uint16x4_t __p1) {
   3249   uint16x4_t __ret;
   3250   __ret = (uint16x4_t)(__p0 <= __p1);
   3251   return __ret;
   3252 }
   3253 #else
   3254 __ai uint16x4_t vcle_u16(uint16x4_t __p0, uint16x4_t __p1) {
   3255   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3256   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3257   uint16x4_t __ret;
   3258   __ret = (uint16x4_t)(__rev0 <= __rev1);
   3259   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3260   return __ret;
   3261 }
   3262 #endif
   3263 
   3264 #ifdef __LITTLE_ENDIAN__
   3265 __ai uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t __p1) {
   3266   uint8x8_t __ret;
   3267   __ret = (uint8x8_t)(__p0 <= __p1);
   3268   return __ret;
   3269 }
   3270 #else
   3271 __ai uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t __p1) {
   3272   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3273   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3274   uint8x8_t __ret;
   3275   __ret = (uint8x8_t)(__rev0 <= __rev1);
   3276   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3277   return __ret;
   3278 }
   3279 #endif
   3280 
   3281 #ifdef __LITTLE_ENDIAN__
   3282 __ai uint32x2_t vcle_f32(float32x2_t __p0, float32x2_t __p1) {
   3283   uint32x2_t __ret;
   3284   __ret = (uint32x2_t)(__p0 <= __p1);
   3285   return __ret;
   3286 }
   3287 #else
   3288 __ai uint32x2_t vcle_f32(float32x2_t __p0, float32x2_t __p1) {
   3289   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3290   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3291   uint32x2_t __ret;
   3292   __ret = (uint32x2_t)(__rev0 <= __rev1);
   3293   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3294   return __ret;
   3295 }
   3296 #endif
   3297 
   3298 #ifdef __LITTLE_ENDIAN__
   3299 __ai uint32x2_t vcle_s32(int32x2_t __p0, int32x2_t __p1) {
   3300   uint32x2_t __ret;
   3301   __ret = (uint32x2_t)(__p0 <= __p1);
   3302   return __ret;
   3303 }
   3304 #else
   3305 __ai uint32x2_t vcle_s32(int32x2_t __p0, int32x2_t __p1) {
   3306   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3307   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3308   uint32x2_t __ret;
   3309   __ret = (uint32x2_t)(__rev0 <= __rev1);
   3310   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3311   return __ret;
   3312 }
   3313 #endif
   3314 
   3315 #ifdef __LITTLE_ENDIAN__
   3316 __ai uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) {
   3317   uint16x4_t __ret;
   3318   __ret = (uint16x4_t)(__p0 <= __p1);
   3319   return __ret;
   3320 }
   3321 #else
   3322 __ai uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) {
   3323   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3324   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3325   uint16x4_t __ret;
   3326   __ret = (uint16x4_t)(__rev0 <= __rev1);
   3327   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3328   return __ret;
   3329 }
   3330 #endif
   3331 
   3332 #ifdef __LITTLE_ENDIAN__
   3333 __ai int8x16_t vclsq_s8(int8x16_t __p0) {
   3334   int8x16_t __ret;
   3335   __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32);
   3336   return __ret;
   3337 }
   3338 #else
   3339 __ai int8x16_t vclsq_s8(int8x16_t __p0) {
   3340   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3341   int8x16_t __ret;
   3342   __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 32);
   3343   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3344   return __ret;
   3345 }
   3346 #endif
   3347 
   3348 #ifdef __LITTLE_ENDIAN__
   3349 __ai int32x4_t vclsq_s32(int32x4_t __p0) {
   3350   int32x4_t __ret;
   3351   __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34);
   3352   return __ret;
   3353 }
   3354 #else
   3355 __ai int32x4_t vclsq_s32(int32x4_t __p0) {
   3356   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3357   int32x4_t __ret;
   3358   __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 34);
   3359   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3360   return __ret;
   3361 }
   3362 #endif
   3363 
   3364 #ifdef __LITTLE_ENDIAN__
   3365 __ai int16x8_t vclsq_s16(int16x8_t __p0) {
   3366   int16x8_t __ret;
   3367   __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33);
   3368   return __ret;
   3369 }
   3370 #else
   3371 __ai int16x8_t vclsq_s16(int16x8_t __p0) {
   3372   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3373   int16x8_t __ret;
   3374   __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 33);
   3375   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3376   return __ret;
   3377 }
   3378 #endif
   3379 
   3380 #ifdef __LITTLE_ENDIAN__
   3381 __ai int8x8_t vcls_s8(int8x8_t __p0) {
   3382   int8x8_t __ret;
   3383   __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0);
   3384   return __ret;
   3385 }
   3386 #else
   3387 __ai int8x8_t vcls_s8(int8x8_t __p0) {
   3388   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3389   int8x8_t __ret;
   3390   __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 0);
   3391   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3392   return __ret;
   3393 }
   3394 #endif
   3395 
   3396 #ifdef __LITTLE_ENDIAN__
   3397 __ai int32x2_t vcls_s32(int32x2_t __p0) {
   3398   int32x2_t __ret;
   3399   __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2);
   3400   return __ret;
   3401 }
   3402 #else
   3403 __ai int32x2_t vcls_s32(int32x2_t __p0) {
   3404   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3405   int32x2_t __ret;
   3406   __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 2);
   3407   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3408   return __ret;
   3409 }
   3410 #endif
   3411 
   3412 #ifdef __LITTLE_ENDIAN__
   3413 __ai int16x4_t vcls_s16(int16x4_t __p0) {
   3414   int16x4_t __ret;
   3415   __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1);
   3416   return __ret;
   3417 }
   3418 #else
   3419 __ai int16x4_t vcls_s16(int16x4_t __p0) {
   3420   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3421   int16x4_t __ret;
   3422   __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 1);
   3423   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3424   return __ret;
   3425 }
   3426 #endif
   3427 
   3428 #ifdef __LITTLE_ENDIAN__
   3429 __ai uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   3430   uint8x16_t __ret;
   3431   __ret = (uint8x16_t)(__p0 < __p1);
   3432   return __ret;
   3433 }
   3434 #else
   3435 __ai uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   3436   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3437   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3438   uint8x16_t __ret;
   3439   __ret = (uint8x16_t)(__rev0 < __rev1);
   3440   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3441   return __ret;
   3442 }
   3443 #endif
   3444 
   3445 #ifdef __LITTLE_ENDIAN__
   3446 __ai uint32x4_t vcltq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   3447   uint32x4_t __ret;
   3448   __ret = (uint32x4_t)(__p0 < __p1);
   3449   return __ret;
   3450 }
   3451 #else
   3452 __ai uint32x4_t vcltq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   3453   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3454   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3455   uint32x4_t __ret;
   3456   __ret = (uint32x4_t)(__rev0 < __rev1);
   3457   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3458   return __ret;
   3459 }
   3460 #endif
   3461 
   3462 #ifdef __LITTLE_ENDIAN__
   3463 __ai uint16x8_t vcltq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   3464   uint16x8_t __ret;
   3465   __ret = (uint16x8_t)(__p0 < __p1);
   3466   return __ret;
   3467 }
   3468 #else
   3469 __ai uint16x8_t vcltq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   3470   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3471   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3472   uint16x8_t __ret;
   3473   __ret = (uint16x8_t)(__rev0 < __rev1);
   3474   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3475   return __ret;
   3476 }
   3477 #endif
   3478 
   3479 #ifdef __LITTLE_ENDIAN__
   3480 __ai uint8x16_t vcltq_s8(int8x16_t __p0, int8x16_t __p1) {
   3481   uint8x16_t __ret;
   3482   __ret = (uint8x16_t)(__p0 < __p1);
   3483   return __ret;
   3484 }
   3485 #else
   3486 __ai uint8x16_t vcltq_s8(int8x16_t __p0, int8x16_t __p1) {
   3487   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3488   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3489   uint8x16_t __ret;
   3490   __ret = (uint8x16_t)(__rev0 < __rev1);
   3491   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3492   return __ret;
   3493 }
   3494 #endif
   3495 
   3496 #ifdef __LITTLE_ENDIAN__
   3497 __ai uint32x4_t vcltq_f32(float32x4_t __p0, float32x4_t __p1) {
   3498   uint32x4_t __ret;
   3499   __ret = (uint32x4_t)(__p0 < __p1);
   3500   return __ret;
   3501 }
   3502 #else
   3503 __ai uint32x4_t vcltq_f32(float32x4_t __p0, float32x4_t __p1) {
   3504   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3505   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3506   uint32x4_t __ret;
   3507   __ret = (uint32x4_t)(__rev0 < __rev1);
   3508   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3509   return __ret;
   3510 }
   3511 #endif
   3512 
   3513 #ifdef __LITTLE_ENDIAN__
   3514 __ai uint32x4_t vcltq_s32(int32x4_t __p0, int32x4_t __p1) {
   3515   uint32x4_t __ret;
   3516   __ret = (uint32x4_t)(__p0 < __p1);
   3517   return __ret;
   3518 }
   3519 #else
   3520 __ai uint32x4_t vcltq_s32(int32x4_t __p0, int32x4_t __p1) {
   3521   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3522   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3523   uint32x4_t __ret;
   3524   __ret = (uint32x4_t)(__rev0 < __rev1);
   3525   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3526   return __ret;
   3527 }
   3528 #endif
   3529 
   3530 #ifdef __LITTLE_ENDIAN__
   3531 __ai uint16x8_t vcltq_s16(int16x8_t __p0, int16x8_t __p1) {
   3532   uint16x8_t __ret;
   3533   __ret = (uint16x8_t)(__p0 < __p1);
   3534   return __ret;
   3535 }
   3536 #else
   3537 __ai uint16x8_t vcltq_s16(int16x8_t __p0, int16x8_t __p1) {
   3538   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3539   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3540   uint16x8_t __ret;
   3541   __ret = (uint16x8_t)(__rev0 < __rev1);
   3542   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3543   return __ret;
   3544 }
   3545 #endif
   3546 
   3547 #ifdef __LITTLE_ENDIAN__
   3548 __ai uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t __p1) {
   3549   uint8x8_t __ret;
   3550   __ret = (uint8x8_t)(__p0 < __p1);
   3551   return __ret;
   3552 }
   3553 #else
   3554 __ai uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t __p1) {
   3555   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3556   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3557   uint8x8_t __ret;
   3558   __ret = (uint8x8_t)(__rev0 < __rev1);
   3559   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3560   return __ret;
   3561 }
   3562 #endif
   3563 
   3564 #ifdef __LITTLE_ENDIAN__
   3565 __ai uint32x2_t vclt_u32(uint32x2_t __p0, uint32x2_t __p1) {
   3566   uint32x2_t __ret;
   3567   __ret = (uint32x2_t)(__p0 < __p1);
   3568   return __ret;
   3569 }
   3570 #else
   3571 __ai uint32x2_t vclt_u32(uint32x2_t __p0, uint32x2_t __p1) {
   3572   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3573   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3574   uint32x2_t __ret;
   3575   __ret = (uint32x2_t)(__rev0 < __rev1);
   3576   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3577   return __ret;
   3578 }
   3579 #endif
   3580 
   3581 #ifdef __LITTLE_ENDIAN__
   3582 __ai uint16x4_t vclt_u16(uint16x4_t __p0, uint16x4_t __p1) {
   3583   uint16x4_t __ret;
   3584   __ret = (uint16x4_t)(__p0 < __p1);
   3585   return __ret;
   3586 }
   3587 #else
   3588 __ai uint16x4_t vclt_u16(uint16x4_t __p0, uint16x4_t __p1) {
   3589   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3590   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3591   uint16x4_t __ret;
   3592   __ret = (uint16x4_t)(__rev0 < __rev1);
   3593   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3594   return __ret;
   3595 }
   3596 #endif
   3597 
   3598 #ifdef __LITTLE_ENDIAN__
   3599 __ai uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t __p1) {
   3600   uint8x8_t __ret;
   3601   __ret = (uint8x8_t)(__p0 < __p1);
   3602   return __ret;
   3603 }
   3604 #else
   3605 __ai uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t __p1) {
   3606   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3607   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3608   uint8x8_t __ret;
   3609   __ret = (uint8x8_t)(__rev0 < __rev1);
   3610   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3611   return __ret;
   3612 }
   3613 #endif
   3614 
   3615 #ifdef __LITTLE_ENDIAN__
   3616 __ai uint32x2_t vclt_f32(float32x2_t __p0, float32x2_t __p1) {
   3617   uint32x2_t __ret;
   3618   __ret = (uint32x2_t)(__p0 < __p1);
   3619   return __ret;
   3620 }
   3621 #else
   3622 __ai uint32x2_t vclt_f32(float32x2_t __p0, float32x2_t __p1) {
   3623   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3624   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3625   uint32x2_t __ret;
   3626   __ret = (uint32x2_t)(__rev0 < __rev1);
   3627   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3628   return __ret;
   3629 }
   3630 #endif
   3631 
   3632 #ifdef __LITTLE_ENDIAN__
   3633 __ai uint32x2_t vclt_s32(int32x2_t __p0, int32x2_t __p1) {
   3634   uint32x2_t __ret;
   3635   __ret = (uint32x2_t)(__p0 < __p1);
   3636   return __ret;
   3637 }
   3638 #else
   3639 __ai uint32x2_t vclt_s32(int32x2_t __p0, int32x2_t __p1) {
   3640   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3641   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   3642   uint32x2_t __ret;
   3643   __ret = (uint32x2_t)(__rev0 < __rev1);
   3644   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3645   return __ret;
   3646 }
   3647 #endif
   3648 
   3649 #ifdef __LITTLE_ENDIAN__
   3650 __ai uint16x4_t vclt_s16(int16x4_t __p0, int16x4_t __p1) {
   3651   uint16x4_t __ret;
   3652   __ret = (uint16x4_t)(__p0 < __p1);
   3653   return __ret;
   3654 }
   3655 #else
   3656 __ai uint16x4_t vclt_s16(int16x4_t __p0, int16x4_t __p1) {
   3657   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3658   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3659   uint16x4_t __ret;
   3660   __ret = (uint16x4_t)(__rev0 < __rev1);
   3661   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3662   return __ret;
   3663 }
   3664 #endif
   3665 
   3666 #ifdef __LITTLE_ENDIAN__
   3667 __ai uint8x16_t vclzq_u8(uint8x16_t __p0) {
   3668   uint8x16_t __ret;
   3669   __ret = (uint8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 48);
   3670   return __ret;
   3671 }
   3672 #else
   3673 __ai uint8x16_t vclzq_u8(uint8x16_t __p0) {
   3674   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3675   uint8x16_t __ret;
   3676   __ret = (uint8x16_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 48);
   3677   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3678   return __ret;
   3679 }
   3680 #endif
   3681 
   3682 #ifdef __LITTLE_ENDIAN__
   3683 __ai uint32x4_t vclzq_u32(uint32x4_t __p0) {
   3684   uint32x4_t __ret;
   3685   __ret = (uint32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 50);
   3686   return __ret;
   3687 }
   3688 #else
   3689 __ai uint32x4_t vclzq_u32(uint32x4_t __p0) {
   3690   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3691   uint32x4_t __ret;
   3692   __ret = (uint32x4_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 50);
   3693   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3694   return __ret;
   3695 }
   3696 #endif
   3697 
   3698 #ifdef __LITTLE_ENDIAN__
   3699 __ai uint16x8_t vclzq_u16(uint16x8_t __p0) {
   3700   uint16x8_t __ret;
   3701   __ret = (uint16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 49);
   3702   return __ret;
   3703 }
   3704 #else
   3705 __ai uint16x8_t vclzq_u16(uint16x8_t __p0) {
   3706   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3707   uint16x8_t __ret;
   3708   __ret = (uint16x8_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 49);
   3709   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3710   return __ret;
   3711 }
   3712 #endif
   3713 
   3714 #ifdef __LITTLE_ENDIAN__
   3715 __ai int8x16_t vclzq_s8(int8x16_t __p0) {
   3716   int8x16_t __ret;
   3717   __ret = (int8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 32);
   3718   return __ret;
   3719 }
   3720 #else
   3721 __ai int8x16_t vclzq_s8(int8x16_t __p0) {
   3722   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3723   int8x16_t __ret;
   3724   __ret = (int8x16_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 32);
   3725   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3726   return __ret;
   3727 }
   3728 #endif
   3729 
   3730 #ifdef __LITTLE_ENDIAN__
   3731 __ai int32x4_t vclzq_s32(int32x4_t __p0) {
   3732   int32x4_t __ret;
   3733   __ret = (int32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 34);
   3734   return __ret;
   3735 }
   3736 #else
   3737 __ai int32x4_t vclzq_s32(int32x4_t __p0) {
   3738   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3739   int32x4_t __ret;
   3740   __ret = (int32x4_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 34);
   3741   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3742   return __ret;
   3743 }
   3744 #endif
   3745 
   3746 #ifdef __LITTLE_ENDIAN__
   3747 __ai int16x8_t vclzq_s16(int16x8_t __p0) {
   3748   int16x8_t __ret;
   3749   __ret = (int16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 33);
   3750   return __ret;
   3751 }
   3752 #else
   3753 __ai int16x8_t vclzq_s16(int16x8_t __p0) {
   3754   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3755   int16x8_t __ret;
   3756   __ret = (int16x8_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 33);
   3757   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3758   return __ret;
   3759 }
   3760 #endif
   3761 
   3762 #ifdef __LITTLE_ENDIAN__
   3763 __ai uint8x8_t vclz_u8(uint8x8_t __p0) {
   3764   uint8x8_t __ret;
   3765   __ret = (uint8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 16);
   3766   return __ret;
   3767 }
   3768 #else
   3769 __ai uint8x8_t vclz_u8(uint8x8_t __p0) {
   3770   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3771   uint8x8_t __ret;
   3772   __ret = (uint8x8_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 16);
   3773   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3774   return __ret;
   3775 }
   3776 #endif
   3777 
   3778 #ifdef __LITTLE_ENDIAN__
   3779 __ai uint32x2_t vclz_u32(uint32x2_t __p0) {
   3780   uint32x2_t __ret;
   3781   __ret = (uint32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 18);
   3782   return __ret;
   3783 }
   3784 #else
   3785 __ai uint32x2_t vclz_u32(uint32x2_t __p0) {
   3786   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3787   uint32x2_t __ret;
   3788   __ret = (uint32x2_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 18);
   3789   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3790   return __ret;
   3791 }
   3792 #endif
   3793 
   3794 #ifdef __LITTLE_ENDIAN__
   3795 __ai uint16x4_t vclz_u16(uint16x4_t __p0) {
   3796   uint16x4_t __ret;
   3797   __ret = (uint16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 17);
   3798   return __ret;
   3799 }
   3800 #else
   3801 __ai uint16x4_t vclz_u16(uint16x4_t __p0) {
   3802   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3803   uint16x4_t __ret;
   3804   __ret = (uint16x4_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 17);
   3805   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3806   return __ret;
   3807 }
   3808 #endif
   3809 
   3810 #ifdef __LITTLE_ENDIAN__
   3811 __ai int8x8_t vclz_s8(int8x8_t __p0) {
   3812   int8x8_t __ret;
   3813   __ret = (int8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 0);
   3814   return __ret;
   3815 }
   3816 #else
   3817 __ai int8x8_t vclz_s8(int8x8_t __p0) {
   3818   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3819   int8x8_t __ret;
   3820   __ret = (int8x8_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 0);
   3821   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3822   return __ret;
   3823 }
   3824 #endif
   3825 
   3826 #ifdef __LITTLE_ENDIAN__
   3827 __ai int32x2_t vclz_s32(int32x2_t __p0) {
   3828   int32x2_t __ret;
   3829   __ret = (int32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 2);
   3830   return __ret;
   3831 }
   3832 #else
   3833 __ai int32x2_t vclz_s32(int32x2_t __p0) {
   3834   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   3835   int32x2_t __ret;
   3836   __ret = (int32x2_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 2);
   3837   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   3838   return __ret;
   3839 }
   3840 #endif
   3841 
   3842 #ifdef __LITTLE_ENDIAN__
   3843 __ai int16x4_t vclz_s16(int16x4_t __p0) {
   3844   int16x4_t __ret;
   3845   __ret = (int16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 1);
   3846   return __ret;
   3847 }
   3848 #else
   3849 __ai int16x4_t vclz_s16(int16x4_t __p0) {
   3850   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3851   int16x4_t __ret;
   3852   __ret = (int16x4_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 1);
   3853   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   3854   return __ret;
   3855 }
   3856 #endif
   3857 
   3858 #ifdef __LITTLE_ENDIAN__
   3859 __ai poly8x8_t vcnt_p8(poly8x8_t __p0) {
   3860   poly8x8_t __ret;
   3861   __ret = (poly8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 4);
   3862   return __ret;
   3863 }
   3864 #else
   3865 __ai poly8x8_t vcnt_p8(poly8x8_t __p0) {
   3866   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3867   poly8x8_t __ret;
   3868   __ret = (poly8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 4);
   3869   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3870   return __ret;
   3871 }
   3872 #endif
   3873 
   3874 #ifdef __LITTLE_ENDIAN__
   3875 __ai poly8x16_t vcntq_p8(poly8x16_t __p0) {
   3876   poly8x16_t __ret;
   3877   __ret = (poly8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 36);
   3878   return __ret;
   3879 }
   3880 #else
   3881 __ai poly8x16_t vcntq_p8(poly8x16_t __p0) {
   3882   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3883   poly8x16_t __ret;
   3884   __ret = (poly8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 36);
   3885   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3886   return __ret;
   3887 }
   3888 #endif
   3889 
   3890 #ifdef __LITTLE_ENDIAN__
   3891 __ai uint8x16_t vcntq_u8(uint8x16_t __p0) {
   3892   uint8x16_t __ret;
   3893   __ret = (uint8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 48);
   3894   return __ret;
   3895 }
   3896 #else
   3897 __ai uint8x16_t vcntq_u8(uint8x16_t __p0) {
   3898   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3899   uint8x16_t __ret;
   3900   __ret = (uint8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 48);
   3901   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3902   return __ret;
   3903 }
   3904 #endif
   3905 
   3906 #ifdef __LITTLE_ENDIAN__
   3907 __ai int8x16_t vcntq_s8(int8x16_t __p0) {
   3908   int8x16_t __ret;
   3909   __ret = (int8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 32);
   3910   return __ret;
   3911 }
   3912 #else
   3913 __ai int8x16_t vcntq_s8(int8x16_t __p0) {
   3914   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3915   int8x16_t __ret;
   3916   __ret = (int8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 32);
   3917   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3918   return __ret;
   3919 }
   3920 #endif
   3921 
   3922 #ifdef __LITTLE_ENDIAN__
   3923 __ai uint8x8_t vcnt_u8(uint8x8_t __p0) {
   3924   uint8x8_t __ret;
   3925   __ret = (uint8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 16);
   3926   return __ret;
   3927 }
   3928 #else
   3929 __ai uint8x8_t vcnt_u8(uint8x8_t __p0) {
   3930   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3931   uint8x8_t __ret;
   3932   __ret = (uint8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 16);
   3933   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3934   return __ret;
   3935 }
   3936 #endif
   3937 
   3938 #ifdef __LITTLE_ENDIAN__
   3939 __ai int8x8_t vcnt_s8(int8x8_t __p0) {
   3940   int8x8_t __ret;
   3941   __ret = (int8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 0);
   3942   return __ret;
   3943 }
   3944 #else
   3945 __ai int8x8_t vcnt_s8(int8x8_t __p0) {
   3946   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3947   int8x8_t __ret;
   3948   __ret = (int8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 0);
   3949   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3950   return __ret;
   3951 }
   3952 #endif
   3953 
   3954 #ifdef __LITTLE_ENDIAN__
   3955 __ai poly8x16_t vcombine_p8(poly8x8_t __p0, poly8x8_t __p1) {
   3956   poly8x16_t __ret;
   3957   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   3958   return __ret;
   3959 }
   3960 #else
   3961 __ai poly8x16_t vcombine_p8(poly8x8_t __p0, poly8x8_t __p1) {
   3962   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3963   poly8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3964   poly8x16_t __ret;
   3965   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   3966   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   3967   return __ret;
   3968 }
   3969 #endif
   3970 
   3971 #ifdef __LITTLE_ENDIAN__
   3972 __ai poly16x8_t vcombine_p16(poly16x4_t __p0, poly16x4_t __p1) {
   3973   poly16x8_t __ret;
   3974   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7);
   3975   return __ret;
   3976 }
   3977 #else
   3978 __ai poly16x8_t vcombine_p16(poly16x4_t __p0, poly16x4_t __p1) {
   3979   poly16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   3980   poly16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   3981   poly16x8_t __ret;
   3982   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7);
   3983   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   3984   return __ret;
   3985 }
   3986 #endif
   3987 
   3988 #ifdef __LITTLE_ENDIAN__
   3989 __ai uint8x16_t vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) {
   3990   uint8x16_t __ret;
   3991   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   3992   return __ret;
   3993 }
   3994 #else
   3995 __ai uint8x16_t vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) {
   3996   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   3997   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   3998   uint8x16_t __ret;
   3999   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   4000   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   4001   return __ret;
   4002 }
   4003 __ai uint8x16_t __noswap_vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) {
   4004   uint8x16_t __ret;
   4005   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   4006   return __ret;
   4007 }
   4008 #endif
   4009 
   4010 #ifdef __LITTLE_ENDIAN__
   4011 __ai uint32x4_t vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) {
   4012   uint32x4_t __ret;
   4013   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3);
   4014   return __ret;
   4015 }
   4016 #else
   4017 __ai uint32x4_t vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) {
   4018   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   4019   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   4020   uint32x4_t __ret;
   4021   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3);
   4022   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4023   return __ret;
   4024 }
   4025 __ai uint32x4_t __noswap_vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) {
   4026   uint32x4_t __ret;
   4027   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3);
   4028   return __ret;
   4029 }
   4030 #endif
   4031 
   4032 #ifdef __LITTLE_ENDIAN__
   4033 __ai uint64x2_t vcombine_u64(uint64x1_t __p0, uint64x1_t __p1) {
   4034   uint64x2_t __ret;
   4035   __ret = __builtin_shufflevector(__p0, __p1, 0, 1);
   4036   return __ret;
   4037 }
   4038 #else
   4039 __ai uint64x2_t vcombine_u64(uint64x1_t __p0, uint64x1_t __p1) {
   4040   uint64x2_t __ret;
   4041   __ret = __builtin_shufflevector(__p0, __p1, 0, 1);
   4042   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   4043   return __ret;
   4044 }
   4045 #endif
   4046 
   4047 #ifdef __LITTLE_ENDIAN__
   4048 __ai uint16x8_t vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) {
   4049   uint16x8_t __ret;
   4050   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7);
   4051   return __ret;
   4052 }
   4053 #else
   4054 __ai uint16x8_t vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) {
   4055   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4056   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   4057   uint16x8_t __ret;
   4058   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7);
   4059   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   4060   return __ret;
   4061 }
   4062 __ai uint16x8_t __noswap_vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) {
   4063   uint16x8_t __ret;
   4064   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7);
   4065   return __ret;
   4066 }
   4067 #endif
   4068 
   4069 #ifdef __LITTLE_ENDIAN__
   4070 __ai int8x16_t vcombine_s8(int8x8_t __p0, int8x8_t __p1) {
   4071   int8x16_t __ret;
   4072   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   4073   return __ret;
   4074 }
   4075 #else
   4076 __ai int8x16_t vcombine_s8(int8x8_t __p0, int8x8_t __p1) {
   4077   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   4078   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   4079   int8x16_t __ret;
   4080   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   4081   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   4082   return __ret;
   4083 }
   4084 __ai int8x16_t __noswap_vcombine_s8(int8x8_t __p0, int8x8_t __p1) {
   4085   int8x16_t __ret;
   4086   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   4087   return __ret;
   4088 }
   4089 #endif
   4090 
   4091 #ifdef __LITTLE_ENDIAN__
   4092 __ai float32x4_t vcombine_f32(float32x2_t __p0, float32x2_t __p1) {
   4093   float32x4_t __ret;
   4094   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3);
   4095   return __ret;
   4096 }
   4097 #else
   4098 __ai float32x4_t vcombine_f32(float32x2_t __p0, float32x2_t __p1) {
   4099   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   4100   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   4101   float32x4_t __ret;
   4102   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3);
   4103   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4104   return __ret;
   4105 }
   4106 __ai float32x4_t __noswap_vcombine_f32(float32x2_t __p0, float32x2_t __p1) {
   4107   float32x4_t __ret;
   4108   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3);
   4109   return __ret;
   4110 }
   4111 #endif
   4112 
   4113 #ifdef __LITTLE_ENDIAN__
   4114 __ai float16x8_t vcombine_f16(float16x4_t __p0, float16x4_t __p1) {
   4115   float16x8_t __ret;
   4116   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7);
   4117   return __ret;
   4118 }
   4119 #else
   4120 __ai float16x8_t vcombine_f16(float16x4_t __p0, float16x4_t __p1) {
   4121   float16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4122   float16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   4123   float16x8_t __ret;
   4124   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7);
   4125   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   4126   return __ret;
   4127 }
   4128 __ai float16x8_t __noswap_vcombine_f16(float16x4_t __p0, float16x4_t __p1) {
   4129   float16x8_t __ret;
   4130   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7);
   4131   return __ret;
   4132 }
   4133 #endif
   4134 
   4135 #ifdef __LITTLE_ENDIAN__
   4136 __ai int32x4_t vcombine_s32(int32x2_t __p0, int32x2_t __p1) {
   4137   int32x4_t __ret;
   4138   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3);
   4139   return __ret;
   4140 }
   4141 #else
   4142 __ai int32x4_t vcombine_s32(int32x2_t __p0, int32x2_t __p1) {
   4143   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   4144   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   4145   int32x4_t __ret;
   4146   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3);
   4147   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4148   return __ret;
   4149 }
   4150 __ai int32x4_t __noswap_vcombine_s32(int32x2_t __p0, int32x2_t __p1) {
   4151   int32x4_t __ret;
   4152   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3);
   4153   return __ret;
   4154 }
   4155 #endif
   4156 
   4157 #ifdef __LITTLE_ENDIAN__
   4158 __ai int64x2_t vcombine_s64(int64x1_t __p0, int64x1_t __p1) {
   4159   int64x2_t __ret;
   4160   __ret = __builtin_shufflevector(__p0, __p1, 0, 1);
   4161   return __ret;
   4162 }
   4163 #else
   4164 __ai int64x2_t vcombine_s64(int64x1_t __p0, int64x1_t __p1) {
   4165   int64x2_t __ret;
   4166   __ret = __builtin_shufflevector(__p0, __p1, 0, 1);
   4167   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   4168   return __ret;
   4169 }
   4170 #endif
   4171 
   4172 #ifdef __LITTLE_ENDIAN__
   4173 __ai int16x8_t vcombine_s16(int16x4_t __p0, int16x4_t __p1) {
   4174   int16x8_t __ret;
   4175   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7);
   4176   return __ret;
   4177 }
   4178 #else
   4179 __ai int16x8_t vcombine_s16(int16x4_t __p0, int16x4_t __p1) {
   4180   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4181   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   4182   int16x8_t __ret;
   4183   __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7);
   4184   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   4185   return __ret;
   4186 }
   4187 __ai int16x8_t __noswap_vcombine_s16(int16x4_t __p0, int16x4_t __p1) {
   4188   int16x8_t __ret;
   4189   __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7);
   4190   return __ret;
   4191 }
   4192 #endif
   4193 
   4194 #ifdef __LITTLE_ENDIAN__
   4195 __ai poly8x8_t vcreate_p8(uint64_t __p0) {
   4196   poly8x8_t __ret;
   4197   __ret = (poly8x8_t)(__p0);
   4198   return __ret;
   4199 }
   4200 #else
   4201 __ai poly8x8_t vcreate_p8(uint64_t __p0) {
   4202   poly8x8_t __ret;
   4203   __ret = (poly8x8_t)(__p0);
   4204   return __ret;
   4205 }
   4206 #endif
   4207 
   4208 #ifdef __LITTLE_ENDIAN__
   4209 __ai poly16x4_t vcreate_p16(uint64_t __p0) {
   4210   poly16x4_t __ret;
   4211   __ret = (poly16x4_t)(__p0);
   4212   return __ret;
   4213 }
   4214 #else
   4215 __ai poly16x4_t vcreate_p16(uint64_t __p0) {
   4216   poly16x4_t __ret;
   4217   __ret = (poly16x4_t)(__p0);
   4218   return __ret;
   4219 }
   4220 #endif
   4221 
   4222 #ifdef __LITTLE_ENDIAN__
   4223 __ai uint8x8_t vcreate_u8(uint64_t __p0) {
   4224   uint8x8_t __ret;
   4225   __ret = (uint8x8_t)(__p0);
   4226   return __ret;
   4227 }
   4228 #else
   4229 __ai uint8x8_t vcreate_u8(uint64_t __p0) {
   4230   uint8x8_t __ret;
   4231   __ret = (uint8x8_t)(__p0);
   4232   return __ret;
   4233 }
   4234 #endif
   4235 
   4236 #ifdef __LITTLE_ENDIAN__
   4237 __ai uint32x2_t vcreate_u32(uint64_t __p0) {
   4238   uint32x2_t __ret;
   4239   __ret = (uint32x2_t)(__p0);
   4240   return __ret;
   4241 }
   4242 #else
   4243 __ai uint32x2_t vcreate_u32(uint64_t __p0) {
   4244   uint32x2_t __ret;
   4245   __ret = (uint32x2_t)(__p0);
   4246   return __ret;
   4247 }
   4248 #endif
   4249 
   4250 #ifdef __LITTLE_ENDIAN__
   4251 __ai uint64x1_t vcreate_u64(uint64_t __p0) {
   4252   uint64x1_t __ret;
   4253   __ret = (uint64x1_t)(__p0);
   4254   return __ret;
   4255 }
   4256 #else
   4257 __ai uint64x1_t vcreate_u64(uint64_t __p0) {
   4258   uint64x1_t __ret;
   4259   __ret = (uint64x1_t)(__p0);
   4260   return __ret;
   4261 }
   4262 #endif
   4263 
   4264 #ifdef __LITTLE_ENDIAN__
   4265 __ai uint16x4_t vcreate_u16(uint64_t __p0) {
   4266   uint16x4_t __ret;
   4267   __ret = (uint16x4_t)(__p0);
   4268   return __ret;
   4269 }
   4270 #else
   4271 __ai uint16x4_t vcreate_u16(uint64_t __p0) {
   4272   uint16x4_t __ret;
   4273   __ret = (uint16x4_t)(__p0);
   4274   return __ret;
   4275 }
   4276 #endif
   4277 
   4278 #ifdef __LITTLE_ENDIAN__
   4279 __ai int8x8_t vcreate_s8(uint64_t __p0) {
   4280   int8x8_t __ret;
   4281   __ret = (int8x8_t)(__p0);
   4282   return __ret;
   4283 }
   4284 #else
   4285 __ai int8x8_t vcreate_s8(uint64_t __p0) {
   4286   int8x8_t __ret;
   4287   __ret = (int8x8_t)(__p0);
   4288   return __ret;
   4289 }
   4290 #endif
   4291 
   4292 #ifdef __LITTLE_ENDIAN__
   4293 __ai float32x2_t vcreate_f32(uint64_t __p0) {
   4294   float32x2_t __ret;
   4295   __ret = (float32x2_t)(__p0);
   4296   return __ret;
   4297 }
   4298 #else
   4299 __ai float32x2_t vcreate_f32(uint64_t __p0) {
   4300   float32x2_t __ret;
   4301   __ret = (float32x2_t)(__p0);
   4302   return __ret;
   4303 }
   4304 #endif
   4305 
   4306 #ifdef __LITTLE_ENDIAN__
   4307 __ai float16x4_t vcreate_f16(uint64_t __p0) {
   4308   float16x4_t __ret;
   4309   __ret = (float16x4_t)(__p0);
   4310   return __ret;
   4311 }
   4312 #else
   4313 __ai float16x4_t vcreate_f16(uint64_t __p0) {
   4314   float16x4_t __ret;
   4315   __ret = (float16x4_t)(__p0);
   4316   return __ret;
   4317 }
   4318 #endif
   4319 
   4320 #ifdef __LITTLE_ENDIAN__
   4321 __ai int32x2_t vcreate_s32(uint64_t __p0) {
   4322   int32x2_t __ret;
   4323   __ret = (int32x2_t)(__p0);
   4324   return __ret;
   4325 }
   4326 #else
   4327 __ai int32x2_t vcreate_s32(uint64_t __p0) {
   4328   int32x2_t __ret;
   4329   __ret = (int32x2_t)(__p0);
   4330   return __ret;
   4331 }
   4332 #endif
   4333 
   4334 #ifdef __LITTLE_ENDIAN__
   4335 __ai int64x1_t vcreate_s64(uint64_t __p0) {
   4336   int64x1_t __ret;
   4337   __ret = (int64x1_t)(__p0);
   4338   return __ret;
   4339 }
   4340 #else
   4341 __ai int64x1_t vcreate_s64(uint64_t __p0) {
   4342   int64x1_t __ret;
   4343   __ret = (int64x1_t)(__p0);
   4344   return __ret;
   4345 }
   4346 #endif
   4347 
   4348 #ifdef __LITTLE_ENDIAN__
   4349 __ai int16x4_t vcreate_s16(uint64_t __p0) {
   4350   int16x4_t __ret;
   4351   __ret = (int16x4_t)(__p0);
   4352   return __ret;
   4353 }
   4354 #else
   4355 __ai int16x4_t vcreate_s16(uint64_t __p0) {
   4356   int16x4_t __ret;
   4357   __ret = (int16x4_t)(__p0);
   4358   return __ret;
   4359 }
   4360 #endif
   4361 
   4362 #ifdef __LITTLE_ENDIAN__
   4363 __ai float16x4_t vcvt_f16_f32(float32x4_t __p0) {
   4364   float16x4_t __ret;
   4365   __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x16_t)__p0, 8);
   4366   return __ret;
   4367 }
   4368 #else
   4369 __ai float16x4_t vcvt_f16_f32(float32x4_t __p0) {
   4370   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4371   float16x4_t __ret;
   4372   __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x16_t)__rev0, 8);
   4373   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4374   return __ret;
   4375 }
   4376 __ai float16x4_t __noswap_vcvt_f16_f32(float32x4_t __p0) {
   4377   float16x4_t __ret;
   4378   __ret = (float16x4_t) __builtin_neon_vcvt_f16_v((int8x16_t)__p0, 8);
   4379   return __ret;
   4380 }
   4381 #endif
   4382 
   4383 #ifdef __LITTLE_ENDIAN__
   4384 __ai float32x4_t vcvtq_f32_u32(uint32x4_t __p0) {
   4385   float32x4_t __ret;
   4386   __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 50);
   4387   return __ret;
   4388 }
   4389 #else
   4390 __ai float32x4_t vcvtq_f32_u32(uint32x4_t __p0) {
   4391   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4392   float32x4_t __ret;
   4393   __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__rev0, 50);
   4394   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4395   return __ret;
   4396 }
   4397 #endif
   4398 
   4399 #ifdef __LITTLE_ENDIAN__
   4400 __ai float32x4_t vcvtq_f32_s32(int32x4_t __p0) {
   4401   float32x4_t __ret;
   4402   __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 34);
   4403   return __ret;
   4404 }
   4405 #else
   4406 __ai float32x4_t vcvtq_f32_s32(int32x4_t __p0) {
   4407   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4408   float32x4_t __ret;
   4409   __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__rev0, 34);
   4410   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4411   return __ret;
   4412 }
   4413 #endif
   4414 
   4415 #ifdef __LITTLE_ENDIAN__
   4416 __ai float32x2_t vcvt_f32_u32(uint32x2_t __p0) {
   4417   float32x2_t __ret;
   4418   __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 18);
   4419   return __ret;
   4420 }
   4421 #else
   4422 __ai float32x2_t vcvt_f32_u32(uint32x2_t __p0) {
   4423   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   4424   float32x2_t __ret;
   4425   __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__rev0, 18);
   4426   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   4427   return __ret;
   4428 }
   4429 #endif
   4430 
   4431 #ifdef __LITTLE_ENDIAN__
   4432 __ai float32x2_t vcvt_f32_s32(int32x2_t __p0) {
   4433   float32x2_t __ret;
   4434   __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 2);
   4435   return __ret;
   4436 }
   4437 #else
   4438 __ai float32x2_t vcvt_f32_s32(int32x2_t __p0) {
   4439   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   4440   float32x2_t __ret;
   4441   __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__rev0, 2);
   4442   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   4443   return __ret;
   4444 }
   4445 #endif
   4446 
   4447 #ifdef __LITTLE_ENDIAN__
   4448 __ai float32x4_t vcvt_f32_f16(float16x4_t __p0) {
   4449   float32x4_t __ret;
   4450   __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8);
   4451   return __ret;
   4452 }
   4453 #else
   4454 __ai float32x4_t vcvt_f32_f16(float16x4_t __p0) {
   4455   float16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4456   float32x4_t __ret;
   4457   __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__rev0, 8);
   4458   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4459   return __ret;
   4460 }
   4461 __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) {
   4462   float32x4_t __ret;
   4463   __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8);
   4464   return __ret;
   4465 }
   4466 #endif
   4467 
   4468 #ifdef __LITTLE_ENDIAN__
   4469 #define vcvtq_n_f32_u32(__p0, __p1) __extension__ ({ \
   4470   uint32x4_t __s0 = __p0; \
   4471   float32x4_t __ret; \
   4472   __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__s0, __p1, 50); \
   4473   __ret; \
   4474 })
   4475 #else
   4476 #define vcvtq_n_f32_u32(__p0, __p1) __extension__ ({ \
   4477   uint32x4_t __s0 = __p0; \
   4478   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4479   float32x4_t __ret; \
   4480   __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__rev0, __p1, 50); \
   4481   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4482   __ret; \
   4483 })
   4484 #endif
   4485 
   4486 #ifdef __LITTLE_ENDIAN__
   4487 #define vcvtq_n_f32_s32(__p0, __p1) __extension__ ({ \
   4488   int32x4_t __s0 = __p0; \
   4489   float32x4_t __ret; \
   4490   __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__s0, __p1, 34); \
   4491   __ret; \
   4492 })
   4493 #else
   4494 #define vcvtq_n_f32_s32(__p0, __p1) __extension__ ({ \
   4495   int32x4_t __s0 = __p0; \
   4496   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4497   float32x4_t __ret; \
   4498   __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__rev0, __p1, 34); \
   4499   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4500   __ret; \
   4501 })
   4502 #endif
   4503 
   4504 #ifdef __LITTLE_ENDIAN__
   4505 #define vcvt_n_f32_u32(__p0, __p1) __extension__ ({ \
   4506   uint32x2_t __s0 = __p0; \
   4507   float32x2_t __ret; \
   4508   __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__s0, __p1, 18); \
   4509   __ret; \
   4510 })
   4511 #else
   4512 #define vcvt_n_f32_u32(__p0, __p1) __extension__ ({ \
   4513   uint32x2_t __s0 = __p0; \
   4514   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4515   float32x2_t __ret; \
   4516   __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__rev0, __p1, 18); \
   4517   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   4518   __ret; \
   4519 })
   4520 #endif
   4521 
   4522 #ifdef __LITTLE_ENDIAN__
   4523 #define vcvt_n_f32_s32(__p0, __p1) __extension__ ({ \
   4524   int32x2_t __s0 = __p0; \
   4525   float32x2_t __ret; \
   4526   __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__s0, __p1, 2); \
   4527   __ret; \
   4528 })
   4529 #else
   4530 #define vcvt_n_f32_s32(__p0, __p1) __extension__ ({ \
   4531   int32x2_t __s0 = __p0; \
   4532   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4533   float32x2_t __ret; \
   4534   __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__rev0, __p1, 2); \
   4535   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   4536   __ret; \
   4537 })
   4538 #endif
   4539 
   4540 #ifdef __LITTLE_ENDIAN__
   4541 #define vcvtq_n_s32_f32(__p0, __p1) __extension__ ({ \
   4542   float32x4_t __s0 = __p0; \
   4543   int32x4_t __ret; \
   4544   __ret = (int32x4_t) __builtin_neon_vcvtq_n_s32_v((int8x16_t)__s0, __p1, 34); \
   4545   __ret; \
   4546 })
   4547 #else
   4548 #define vcvtq_n_s32_f32(__p0, __p1) __extension__ ({ \
   4549   float32x4_t __s0 = __p0; \
   4550   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4551   int32x4_t __ret; \
   4552   __ret = (int32x4_t) __builtin_neon_vcvtq_n_s32_v((int8x16_t)__rev0, __p1, 34); \
   4553   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4554   __ret; \
   4555 })
   4556 #endif
   4557 
   4558 #ifdef __LITTLE_ENDIAN__
   4559 #define vcvt_n_s32_f32(__p0, __p1) __extension__ ({ \
   4560   float32x2_t __s0 = __p0; \
   4561   int32x2_t __ret; \
   4562   __ret = (int32x2_t) __builtin_neon_vcvt_n_s32_v((int8x8_t)__s0, __p1, 2); \
   4563   __ret; \
   4564 })
   4565 #else
   4566 #define vcvt_n_s32_f32(__p0, __p1) __extension__ ({ \
   4567   float32x2_t __s0 = __p0; \
   4568   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4569   int32x2_t __ret; \
   4570   __ret = (int32x2_t) __builtin_neon_vcvt_n_s32_v((int8x8_t)__rev0, __p1, 2); \
   4571   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   4572   __ret; \
   4573 })
   4574 #endif
   4575 
   4576 #ifdef __LITTLE_ENDIAN__
   4577 #define vcvtq_n_u32_f32(__p0, __p1) __extension__ ({ \
   4578   float32x4_t __s0 = __p0; \
   4579   uint32x4_t __ret; \
   4580   __ret = (uint32x4_t) __builtin_neon_vcvtq_n_u32_v((int8x16_t)__s0, __p1, 50); \
   4581   __ret; \
   4582 })
   4583 #else
   4584 #define vcvtq_n_u32_f32(__p0, __p1) __extension__ ({ \
   4585   float32x4_t __s0 = __p0; \
   4586   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4587   uint32x4_t __ret; \
   4588   __ret = (uint32x4_t) __builtin_neon_vcvtq_n_u32_v((int8x16_t)__rev0, __p1, 50); \
   4589   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4590   __ret; \
   4591 })
   4592 #endif
   4593 
   4594 #ifdef __LITTLE_ENDIAN__
   4595 #define vcvt_n_u32_f32(__p0, __p1) __extension__ ({ \
   4596   float32x2_t __s0 = __p0; \
   4597   uint32x2_t __ret; \
   4598   __ret = (uint32x2_t) __builtin_neon_vcvt_n_u32_v((int8x8_t)__s0, __p1, 18); \
   4599   __ret; \
   4600 })
   4601 #else
   4602 #define vcvt_n_u32_f32(__p0, __p1) __extension__ ({ \
   4603   float32x2_t __s0 = __p0; \
   4604   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4605   uint32x2_t __ret; \
   4606   __ret = (uint32x2_t) __builtin_neon_vcvt_n_u32_v((int8x8_t)__rev0, __p1, 18); \
   4607   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   4608   __ret; \
   4609 })
   4610 #endif
   4611 
   4612 #ifdef __LITTLE_ENDIAN__
   4613 __ai int32x4_t vcvtq_s32_f32(float32x4_t __p0) {
   4614   int32x4_t __ret;
   4615   __ret = (int32x4_t) __builtin_neon_vcvtq_s32_v((int8x16_t)__p0, 34);
   4616   return __ret;
   4617 }
   4618 #else
   4619 __ai int32x4_t vcvtq_s32_f32(float32x4_t __p0) {
   4620   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4621   int32x4_t __ret;
   4622   __ret = (int32x4_t) __builtin_neon_vcvtq_s32_v((int8x16_t)__rev0, 34);
   4623   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4624   return __ret;
   4625 }
   4626 #endif
   4627 
   4628 #ifdef __LITTLE_ENDIAN__
   4629 __ai int32x2_t vcvt_s32_f32(float32x2_t __p0) {
   4630   int32x2_t __ret;
   4631   __ret = (int32x2_t) __builtin_neon_vcvt_s32_v((int8x8_t)__p0, 2);
   4632   return __ret;
   4633 }
   4634 #else
   4635 __ai int32x2_t vcvt_s32_f32(float32x2_t __p0) {
   4636   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   4637   int32x2_t __ret;
   4638   __ret = (int32x2_t) __builtin_neon_vcvt_s32_v((int8x8_t)__rev0, 2);
   4639   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   4640   return __ret;
   4641 }
   4642 #endif
   4643 
   4644 #ifdef __LITTLE_ENDIAN__
   4645 __ai uint32x4_t vcvtq_u32_f32(float32x4_t __p0) {
   4646   uint32x4_t __ret;
   4647   __ret = (uint32x4_t) __builtin_neon_vcvtq_u32_v((int8x16_t)__p0, 50);
   4648   return __ret;
   4649 }
   4650 #else
   4651 __ai uint32x4_t vcvtq_u32_f32(float32x4_t __p0) {
   4652   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   4653   uint32x4_t __ret;
   4654   __ret = (uint32x4_t) __builtin_neon_vcvtq_u32_v((int8x16_t)__rev0, 50);
   4655   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   4656   return __ret;
   4657 }
   4658 #endif
   4659 
   4660 #ifdef __LITTLE_ENDIAN__
   4661 __ai uint32x2_t vcvt_u32_f32(float32x2_t __p0) {
   4662   uint32x2_t __ret;
   4663   __ret = (uint32x2_t) __builtin_neon_vcvt_u32_v((int8x8_t)__p0, 18);
   4664   return __ret;
   4665 }
   4666 #else
   4667 __ai uint32x2_t vcvt_u32_f32(float32x2_t __p0) {
   4668   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   4669   uint32x2_t __ret;
   4670   __ret = (uint32x2_t) __builtin_neon_vcvt_u32_v((int8x8_t)__rev0, 18);
   4671   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   4672   return __ret;
   4673 }
   4674 #endif
   4675 
   4676 #ifdef __LITTLE_ENDIAN__
   4677 #define vdup_lane_p8(__p0, __p1) __extension__ ({ \
   4678   poly8x8_t __s0 = __p0; \
   4679   poly8x8_t __ret; \
   4680   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4681   __ret; \
   4682 })
   4683 #else
   4684 #define vdup_lane_p8(__p0, __p1) __extension__ ({ \
   4685   poly8x8_t __s0 = __p0; \
   4686   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   4687   poly8x8_t __ret; \
   4688   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4689   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   4690   __ret; \
   4691 })
   4692 #endif
   4693 
   4694 #ifdef __LITTLE_ENDIAN__
   4695 #define vdup_lane_p16(__p0, __p1) __extension__ ({ \
   4696   poly16x4_t __s0 = __p0; \
   4697   poly16x4_t __ret; \
   4698   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \
   4699   __ret; \
   4700 })
   4701 #else
   4702 #define vdup_lane_p16(__p0, __p1) __extension__ ({ \
   4703   poly16x4_t __s0 = __p0; \
   4704   poly16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4705   poly16x4_t __ret; \
   4706   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \
   4707   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4708   __ret; \
   4709 })
   4710 #endif
   4711 
   4712 #ifdef __LITTLE_ENDIAN__
   4713 #define vdupq_lane_p8(__p0, __p1) __extension__ ({ \
   4714   poly8x8_t __s0 = __p0; \
   4715   poly8x16_t __ret; \
   4716   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4717   __ret; \
   4718 })
   4719 #else
   4720 #define vdupq_lane_p8(__p0, __p1) __extension__ ({ \
   4721   poly8x8_t __s0 = __p0; \
   4722   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   4723   poly8x16_t __ret; \
   4724   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4725   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   4726   __ret; \
   4727 })
   4728 #endif
   4729 
   4730 #ifdef __LITTLE_ENDIAN__
   4731 #define vdupq_lane_p16(__p0, __p1) __extension__ ({ \
   4732   poly16x4_t __s0 = __p0; \
   4733   poly16x8_t __ret; \
   4734   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4735   __ret; \
   4736 })
   4737 #else
   4738 #define vdupq_lane_p16(__p0, __p1) __extension__ ({ \
   4739   poly16x4_t __s0 = __p0; \
   4740   poly16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4741   poly16x8_t __ret; \
   4742   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4743   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   4744   __ret; \
   4745 })
   4746 #endif
   4747 
   4748 #ifdef __LITTLE_ENDIAN__
   4749 #define vdupq_lane_u8(__p0, __p1) __extension__ ({ \
   4750   uint8x8_t __s0 = __p0; \
   4751   uint8x16_t __ret; \
   4752   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4753   __ret; \
   4754 })
   4755 #else
   4756 #define vdupq_lane_u8(__p0, __p1) __extension__ ({ \
   4757   uint8x8_t __s0 = __p0; \
   4758   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   4759   uint8x16_t __ret; \
   4760   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4761   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   4762   __ret; \
   4763 })
   4764 #endif
   4765 
   4766 #ifdef __LITTLE_ENDIAN__
   4767 #define vdupq_lane_u32(__p0, __p1) __extension__ ({ \
   4768   uint32x2_t __s0 = __p0; \
   4769   uint32x4_t __ret; \
   4770   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \
   4771   __ret; \
   4772 })
   4773 #else
   4774 #define vdupq_lane_u32(__p0, __p1) __extension__ ({ \
   4775   uint32x2_t __s0 = __p0; \
   4776   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4777   uint32x4_t __ret; \
   4778   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \
   4779   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4780   __ret; \
   4781 })
   4782 #endif
   4783 
   4784 #ifdef __LITTLE_ENDIAN__
   4785 #define vdupq_lane_u64(__p0, __p1) __extension__ ({ \
   4786   uint64x1_t __s0 = __p0; \
   4787   uint64x2_t __ret; \
   4788   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \
   4789   __ret; \
   4790 })
   4791 #else
   4792 #define vdupq_lane_u64(__p0, __p1) __extension__ ({ \
   4793   uint64x1_t __s0 = __p0; \
   4794   uint64x2_t __ret; \
   4795   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \
   4796   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   4797   __ret; \
   4798 })
   4799 #endif
   4800 
   4801 #ifdef __LITTLE_ENDIAN__
   4802 #define vdupq_lane_u16(__p0, __p1) __extension__ ({ \
   4803   uint16x4_t __s0 = __p0; \
   4804   uint16x8_t __ret; \
   4805   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4806   __ret; \
   4807 })
   4808 #else
   4809 #define vdupq_lane_u16(__p0, __p1) __extension__ ({ \
   4810   uint16x4_t __s0 = __p0; \
   4811   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4812   uint16x8_t __ret; \
   4813   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4814   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   4815   __ret; \
   4816 })
   4817 #endif
   4818 
   4819 #ifdef __LITTLE_ENDIAN__
   4820 #define vdupq_lane_s8(__p0, __p1) __extension__ ({ \
   4821   int8x8_t __s0 = __p0; \
   4822   int8x16_t __ret; \
   4823   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4824   __ret; \
   4825 })
   4826 #else
   4827 #define vdupq_lane_s8(__p0, __p1) __extension__ ({ \
   4828   int8x8_t __s0 = __p0; \
   4829   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   4830   int8x16_t __ret; \
   4831   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4832   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   4833   __ret; \
   4834 })
   4835 #endif
   4836 
   4837 #ifdef __LITTLE_ENDIAN__
   4838 #define vdupq_lane_f32(__p0, __p1) __extension__ ({ \
   4839   float32x2_t __s0 = __p0; \
   4840   float32x4_t __ret; \
   4841   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \
   4842   __ret; \
   4843 })
   4844 #else
   4845 #define vdupq_lane_f32(__p0, __p1) __extension__ ({ \
   4846   float32x2_t __s0 = __p0; \
   4847   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4848   float32x4_t __ret; \
   4849   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \
   4850   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4851   __ret; \
   4852 })
   4853 #endif
   4854 
   4855 #ifdef __LITTLE_ENDIAN__
   4856 #define vdupq_lane_s32(__p0, __p1) __extension__ ({ \
   4857   int32x2_t __s0 = __p0; \
   4858   int32x4_t __ret; \
   4859   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \
   4860   __ret; \
   4861 })
   4862 #else
   4863 #define vdupq_lane_s32(__p0, __p1) __extension__ ({ \
   4864   int32x2_t __s0 = __p0; \
   4865   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4866   int32x4_t __ret; \
   4867   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \
   4868   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4869   __ret; \
   4870 })
   4871 #endif
   4872 
   4873 #ifdef __LITTLE_ENDIAN__
   4874 #define vdupq_lane_s64(__p0, __p1) __extension__ ({ \
   4875   int64x1_t __s0 = __p0; \
   4876   int64x2_t __ret; \
   4877   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \
   4878   __ret; \
   4879 })
   4880 #else
   4881 #define vdupq_lane_s64(__p0, __p1) __extension__ ({ \
   4882   int64x1_t __s0 = __p0; \
   4883   int64x2_t __ret; \
   4884   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \
   4885   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   4886   __ret; \
   4887 })
   4888 #endif
   4889 
   4890 #ifdef __LITTLE_ENDIAN__
   4891 #define vdupq_lane_s16(__p0, __p1) __extension__ ({ \
   4892   int16x4_t __s0 = __p0; \
   4893   int16x8_t __ret; \
   4894   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4895   __ret; \
   4896 })
   4897 #else
   4898 #define vdupq_lane_s16(__p0, __p1) __extension__ ({ \
   4899   int16x4_t __s0 = __p0; \
   4900   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4901   int16x8_t __ret; \
   4902   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4903   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   4904   __ret; \
   4905 })
   4906 #endif
   4907 
   4908 #ifdef __LITTLE_ENDIAN__
   4909 #define vdup_lane_u8(__p0, __p1) __extension__ ({ \
   4910   uint8x8_t __s0 = __p0; \
   4911   uint8x8_t __ret; \
   4912   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4913   __ret; \
   4914 })
   4915 #else
   4916 #define vdup_lane_u8(__p0, __p1) __extension__ ({ \
   4917   uint8x8_t __s0 = __p0; \
   4918   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   4919   uint8x8_t __ret; \
   4920   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4921   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   4922   __ret; \
   4923 })
   4924 #endif
   4925 
   4926 #ifdef __LITTLE_ENDIAN__
   4927 #define vdup_lane_u32(__p0, __p1) __extension__ ({ \
   4928   uint32x2_t __s0 = __p0; \
   4929   uint32x2_t __ret; \
   4930   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \
   4931   __ret; \
   4932 })
   4933 #else
   4934 #define vdup_lane_u32(__p0, __p1) __extension__ ({ \
   4935   uint32x2_t __s0 = __p0; \
   4936   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   4937   uint32x2_t __ret; \
   4938   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \
   4939   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   4940   __ret; \
   4941 })
   4942 #endif
   4943 
   4944 #ifdef __LITTLE_ENDIAN__
   4945 #define vdup_lane_u64(__p0, __p1) __extension__ ({ \
   4946   uint64x1_t __s0 = __p0; \
   4947   uint64x1_t __ret; \
   4948   __ret = __builtin_shufflevector(__s0, __s0, __p1); \
   4949   __ret; \
   4950 })
   4951 #else
   4952 #define vdup_lane_u64(__p0, __p1) __extension__ ({ \
   4953   uint64x1_t __s0 = __p0; \
   4954   uint64x1_t __ret; \
   4955   __ret = __builtin_shufflevector(__s0, __s0, __p1); \
   4956   __ret; \
   4957 })
   4958 #endif
   4959 
   4960 #ifdef __LITTLE_ENDIAN__
   4961 #define vdup_lane_u16(__p0, __p1) __extension__ ({ \
   4962   uint16x4_t __s0 = __p0; \
   4963   uint16x4_t __ret; \
   4964   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \
   4965   __ret; \
   4966 })
   4967 #else
   4968 #define vdup_lane_u16(__p0, __p1) __extension__ ({ \
   4969   uint16x4_t __s0 = __p0; \
   4970   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   4971   uint16x4_t __ret; \
   4972   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \
   4973   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   4974   __ret; \
   4975 })
   4976 #endif
   4977 
   4978 #ifdef __LITTLE_ENDIAN__
   4979 #define vdup_lane_s8(__p0, __p1) __extension__ ({ \
   4980   int8x8_t __s0 = __p0; \
   4981   int8x8_t __ret; \
   4982   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4983   __ret; \
   4984 })
   4985 #else
   4986 #define vdup_lane_s8(__p0, __p1) __extension__ ({ \
   4987   int8x8_t __s0 = __p0; \
   4988   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   4989   int8x8_t __ret; \
   4990   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1); \
   4991   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   4992   __ret; \
   4993 })
   4994 #endif
   4995 
   4996 #ifdef __LITTLE_ENDIAN__
   4997 #define vdup_lane_f32(__p0, __p1) __extension__ ({ \
   4998   float32x2_t __s0 = __p0; \
   4999   float32x2_t __ret; \
   5000   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \
   5001   __ret; \
   5002 })
   5003 #else
   5004 #define vdup_lane_f32(__p0, __p1) __extension__ ({ \
   5005   float32x2_t __s0 = __p0; \
   5006   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   5007   float32x2_t __ret; \
   5008   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \
   5009   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   5010   __ret; \
   5011 })
   5012 #endif
   5013 
   5014 #ifdef __LITTLE_ENDIAN__
   5015 #define vdup_lane_s32(__p0, __p1) __extension__ ({ \
   5016   int32x2_t __s0 = __p0; \
   5017   int32x2_t __ret; \
   5018   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1); \
   5019   __ret; \
   5020 })
   5021 #else
   5022 #define vdup_lane_s32(__p0, __p1) __extension__ ({ \
   5023   int32x2_t __s0 = __p0; \
   5024   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   5025   int32x2_t __ret; \
   5026   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1); \
   5027   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   5028   __ret; \
   5029 })
   5030 #endif
   5031 
   5032 #ifdef __LITTLE_ENDIAN__
   5033 #define vdup_lane_s64(__p0, __p1) __extension__ ({ \
   5034   int64x1_t __s0 = __p0; \
   5035   int64x1_t __ret; \
   5036   __ret = __builtin_shufflevector(__s0, __s0, __p1); \
   5037   __ret; \
   5038 })
   5039 #else
   5040 #define vdup_lane_s64(__p0, __p1) __extension__ ({ \
   5041   int64x1_t __s0 = __p0; \
   5042   int64x1_t __ret; \
   5043   __ret = __builtin_shufflevector(__s0, __s0, __p1); \
   5044   __ret; \
   5045 })
   5046 #endif
   5047 
   5048 #ifdef __LITTLE_ENDIAN__
   5049 #define vdup_lane_s16(__p0, __p1) __extension__ ({ \
   5050   int16x4_t __s0 = __p0; \
   5051   int16x4_t __ret; \
   5052   __ret = __builtin_shufflevector(__s0, __s0, __p1, __p1, __p1, __p1); \
   5053   __ret; \
   5054 })
   5055 #else
   5056 #define vdup_lane_s16(__p0, __p1) __extension__ ({ \
   5057   int16x4_t __s0 = __p0; \
   5058   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   5059   int16x4_t __ret; \
   5060   __ret = __builtin_shufflevector(__rev0, __rev0, __p1, __p1, __p1, __p1); \
   5061   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   5062   __ret; \
   5063 })
   5064 #endif
   5065 
   5066 #ifdef __LITTLE_ENDIAN__
   5067 __ai poly8x8_t vdup_n_p8(poly8_t __p0) {
   5068   poly8x8_t __ret;
   5069   __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5070   return __ret;
   5071 }
   5072 #else
   5073 __ai poly8x8_t vdup_n_p8(poly8_t __p0) {
   5074   poly8x8_t __ret;
   5075   __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5076   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5077   return __ret;
   5078 }
   5079 #endif
   5080 
   5081 #ifdef __LITTLE_ENDIAN__
   5082 __ai poly16x4_t vdup_n_p16(poly16_t __p0) {
   5083   poly16x4_t __ret;
   5084   __ret = (poly16x4_t) {__p0, __p0, __p0, __p0};
   5085   return __ret;
   5086 }
   5087 #else
   5088 __ai poly16x4_t vdup_n_p16(poly16_t __p0) {
   5089   poly16x4_t __ret;
   5090   __ret = (poly16x4_t) {__p0, __p0, __p0, __p0};
   5091   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5092   return __ret;
   5093 }
   5094 #endif
   5095 
   5096 #ifdef __LITTLE_ENDIAN__
   5097 __ai poly8x16_t vdupq_n_p8(poly8_t __p0) {
   5098   poly8x16_t __ret;
   5099   __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5100   return __ret;
   5101 }
   5102 #else
   5103 __ai poly8x16_t vdupq_n_p8(poly8_t __p0) {
   5104   poly8x16_t __ret;
   5105   __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5106   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5107   return __ret;
   5108 }
   5109 #endif
   5110 
   5111 #ifdef __LITTLE_ENDIAN__
   5112 __ai poly16x8_t vdupq_n_p16(poly16_t __p0) {
   5113   poly16x8_t __ret;
   5114   __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5115   return __ret;
   5116 }
   5117 #else
   5118 __ai poly16x8_t vdupq_n_p16(poly16_t __p0) {
   5119   poly16x8_t __ret;
   5120   __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5121   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5122   return __ret;
   5123 }
   5124 #endif
   5125 
   5126 #ifdef __LITTLE_ENDIAN__
   5127 __ai uint8x16_t vdupq_n_u8(uint8_t __p0) {
   5128   uint8x16_t __ret;
   5129   __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5130   return __ret;
   5131 }
   5132 #else
   5133 __ai uint8x16_t vdupq_n_u8(uint8_t __p0) {
   5134   uint8x16_t __ret;
   5135   __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5136   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5137   return __ret;
   5138 }
   5139 #endif
   5140 
   5141 #ifdef __LITTLE_ENDIAN__
   5142 __ai uint32x4_t vdupq_n_u32(uint32_t __p0) {
   5143   uint32x4_t __ret;
   5144   __ret = (uint32x4_t) {__p0, __p0, __p0, __p0};
   5145   return __ret;
   5146 }
   5147 #else
   5148 __ai uint32x4_t vdupq_n_u32(uint32_t __p0) {
   5149   uint32x4_t __ret;
   5150   __ret = (uint32x4_t) {__p0, __p0, __p0, __p0};
   5151   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5152   return __ret;
   5153 }
   5154 #endif
   5155 
   5156 #ifdef __LITTLE_ENDIAN__
   5157 __ai uint64x2_t vdupq_n_u64(uint64_t __p0) {
   5158   uint64x2_t __ret;
   5159   __ret = (uint64x2_t) {__p0, __p0};
   5160   return __ret;
   5161 }
   5162 #else
   5163 __ai uint64x2_t vdupq_n_u64(uint64_t __p0) {
   5164   uint64x2_t __ret;
   5165   __ret = (uint64x2_t) {__p0, __p0};
   5166   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5167   return __ret;
   5168 }
   5169 #endif
   5170 
   5171 #ifdef __LITTLE_ENDIAN__
   5172 __ai uint16x8_t vdupq_n_u16(uint16_t __p0) {
   5173   uint16x8_t __ret;
   5174   __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5175   return __ret;
   5176 }
   5177 #else
   5178 __ai uint16x8_t vdupq_n_u16(uint16_t __p0) {
   5179   uint16x8_t __ret;
   5180   __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5181   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5182   return __ret;
   5183 }
   5184 #endif
   5185 
   5186 #ifdef __LITTLE_ENDIAN__
   5187 __ai int8x16_t vdupq_n_s8(int8_t __p0) {
   5188   int8x16_t __ret;
   5189   __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5190   return __ret;
   5191 }
   5192 #else
   5193 __ai int8x16_t vdupq_n_s8(int8_t __p0) {
   5194   int8x16_t __ret;
   5195   __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5196   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5197   return __ret;
   5198 }
   5199 #endif
   5200 
   5201 #ifdef __LITTLE_ENDIAN__
   5202 __ai float32x4_t vdupq_n_f32(float32_t __p0) {
   5203   float32x4_t __ret;
   5204   __ret = (float32x4_t) {__p0, __p0, __p0, __p0};
   5205   return __ret;
   5206 }
   5207 #else
   5208 __ai float32x4_t vdupq_n_f32(float32_t __p0) {
   5209   float32x4_t __ret;
   5210   __ret = (float32x4_t) {__p0, __p0, __p0, __p0};
   5211   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5212   return __ret;
   5213 }
   5214 #endif
   5215 
   5216 #ifdef __LITTLE_ENDIAN__
   5217 #define vdupq_n_f16(__p0) __extension__ ({ \
   5218   float16_t __s0 = __p0; \
   5219   float16x8_t __ret; \
   5220   __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \
   5221   __ret; \
   5222 })
   5223 #else
   5224 #define vdupq_n_f16(__p0) __extension__ ({ \
   5225   float16_t __s0 = __p0; \
   5226   float16x8_t __ret; \
   5227   __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \
   5228   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   5229   __ret; \
   5230 })
   5231 #endif
   5232 
   5233 #ifdef __LITTLE_ENDIAN__
   5234 __ai int32x4_t vdupq_n_s32(int32_t __p0) {
   5235   int32x4_t __ret;
   5236   __ret = (int32x4_t) {__p0, __p0, __p0, __p0};
   5237   return __ret;
   5238 }
   5239 #else
   5240 __ai int32x4_t vdupq_n_s32(int32_t __p0) {
   5241   int32x4_t __ret;
   5242   __ret = (int32x4_t) {__p0, __p0, __p0, __p0};
   5243   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5244   return __ret;
   5245 }
   5246 #endif
   5247 
   5248 #ifdef __LITTLE_ENDIAN__
   5249 __ai int64x2_t vdupq_n_s64(int64_t __p0) {
   5250   int64x2_t __ret;
   5251   __ret = (int64x2_t) {__p0, __p0};
   5252   return __ret;
   5253 }
   5254 #else
   5255 __ai int64x2_t vdupq_n_s64(int64_t __p0) {
   5256   int64x2_t __ret;
   5257   __ret = (int64x2_t) {__p0, __p0};
   5258   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5259   return __ret;
   5260 }
   5261 #endif
   5262 
   5263 #ifdef __LITTLE_ENDIAN__
   5264 __ai int16x8_t vdupq_n_s16(int16_t __p0) {
   5265   int16x8_t __ret;
   5266   __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5267   return __ret;
   5268 }
   5269 #else
   5270 __ai int16x8_t vdupq_n_s16(int16_t __p0) {
   5271   int16x8_t __ret;
   5272   __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5273   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5274   return __ret;
   5275 }
   5276 #endif
   5277 
   5278 #ifdef __LITTLE_ENDIAN__
   5279 __ai uint8x8_t vdup_n_u8(uint8_t __p0) {
   5280   uint8x8_t __ret;
   5281   __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5282   return __ret;
   5283 }
   5284 #else
   5285 __ai uint8x8_t vdup_n_u8(uint8_t __p0) {
   5286   uint8x8_t __ret;
   5287   __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5288   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5289   return __ret;
   5290 }
   5291 #endif
   5292 
   5293 #ifdef __LITTLE_ENDIAN__
   5294 __ai uint32x2_t vdup_n_u32(uint32_t __p0) {
   5295   uint32x2_t __ret;
   5296   __ret = (uint32x2_t) {__p0, __p0};
   5297   return __ret;
   5298 }
   5299 #else
   5300 __ai uint32x2_t vdup_n_u32(uint32_t __p0) {
   5301   uint32x2_t __ret;
   5302   __ret = (uint32x2_t) {__p0, __p0};
   5303   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5304   return __ret;
   5305 }
   5306 #endif
   5307 
   5308 #ifdef __LITTLE_ENDIAN__
   5309 __ai uint64x1_t vdup_n_u64(uint64_t __p0) {
   5310   uint64x1_t __ret;
   5311   __ret = (uint64x1_t) {__p0};
   5312   return __ret;
   5313 }
   5314 #else
   5315 __ai uint64x1_t vdup_n_u64(uint64_t __p0) {
   5316   uint64x1_t __ret;
   5317   __ret = (uint64x1_t) {__p0};
   5318   return __ret;
   5319 }
   5320 #endif
   5321 
   5322 #ifdef __LITTLE_ENDIAN__
   5323 __ai uint16x4_t vdup_n_u16(uint16_t __p0) {
   5324   uint16x4_t __ret;
   5325   __ret = (uint16x4_t) {__p0, __p0, __p0, __p0};
   5326   return __ret;
   5327 }
   5328 #else
   5329 __ai uint16x4_t vdup_n_u16(uint16_t __p0) {
   5330   uint16x4_t __ret;
   5331   __ret = (uint16x4_t) {__p0, __p0, __p0, __p0};
   5332   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5333   return __ret;
   5334 }
   5335 #endif
   5336 
   5337 #ifdef __LITTLE_ENDIAN__
   5338 __ai int8x8_t vdup_n_s8(int8_t __p0) {
   5339   int8x8_t __ret;
   5340   __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5341   return __ret;
   5342 }
   5343 #else
   5344 __ai int8x8_t vdup_n_s8(int8_t __p0) {
   5345   int8x8_t __ret;
   5346   __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0};
   5347   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5348   return __ret;
   5349 }
   5350 #endif
   5351 
   5352 #ifdef __LITTLE_ENDIAN__
   5353 __ai float32x2_t vdup_n_f32(float32_t __p0) {
   5354   float32x2_t __ret;
   5355   __ret = (float32x2_t) {__p0, __p0};
   5356   return __ret;
   5357 }
   5358 #else
   5359 __ai float32x2_t vdup_n_f32(float32_t __p0) {
   5360   float32x2_t __ret;
   5361   __ret = (float32x2_t) {__p0, __p0};
   5362   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5363   return __ret;
   5364 }
   5365 #endif
   5366 
   5367 #ifdef __LITTLE_ENDIAN__
   5368 #define vdup_n_f16(__p0) __extension__ ({ \
   5369   float16_t __s0 = __p0; \
   5370   float16x4_t __ret; \
   5371   __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \
   5372   __ret; \
   5373 })
   5374 #else
   5375 #define vdup_n_f16(__p0) __extension__ ({ \
   5376   float16_t __s0 = __p0; \
   5377   float16x4_t __ret; \
   5378   __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \
   5379   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   5380   __ret; \
   5381 })
   5382 #endif
   5383 
   5384 #ifdef __LITTLE_ENDIAN__
   5385 __ai int32x2_t vdup_n_s32(int32_t __p0) {
   5386   int32x2_t __ret;
   5387   __ret = (int32x2_t) {__p0, __p0};
   5388   return __ret;
   5389 }
   5390 #else
   5391 __ai int32x2_t vdup_n_s32(int32_t __p0) {
   5392   int32x2_t __ret;
   5393   __ret = (int32x2_t) {__p0, __p0};
   5394   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5395   return __ret;
   5396 }
   5397 #endif
   5398 
   5399 #ifdef __LITTLE_ENDIAN__
   5400 __ai int64x1_t vdup_n_s64(int64_t __p0) {
   5401   int64x1_t __ret;
   5402   __ret = (int64x1_t) {__p0};
   5403   return __ret;
   5404 }
   5405 #else
   5406 __ai int64x1_t vdup_n_s64(int64_t __p0) {
   5407   int64x1_t __ret;
   5408   __ret = (int64x1_t) {__p0};
   5409   return __ret;
   5410 }
   5411 #endif
   5412 
   5413 #ifdef __LITTLE_ENDIAN__
   5414 __ai int16x4_t vdup_n_s16(int16_t __p0) {
   5415   int16x4_t __ret;
   5416   __ret = (int16x4_t) {__p0, __p0, __p0, __p0};
   5417   return __ret;
   5418 }
   5419 #else
   5420 __ai int16x4_t vdup_n_s16(int16_t __p0) {
   5421   int16x4_t __ret;
   5422   __ret = (int16x4_t) {__p0, __p0, __p0, __p0};
   5423   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5424   return __ret;
   5425 }
   5426 #endif
   5427 
   5428 #ifdef __LITTLE_ENDIAN__
   5429 __ai uint8x16_t veorq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   5430   uint8x16_t __ret;
   5431   __ret = __p0 ^ __p1;
   5432   return __ret;
   5433 }
   5434 #else
   5435 __ai uint8x16_t veorq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   5436   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5437   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5438   uint8x16_t __ret;
   5439   __ret = __rev0 ^ __rev1;
   5440   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5441   return __ret;
   5442 }
   5443 #endif
   5444 
   5445 #ifdef __LITTLE_ENDIAN__
   5446 __ai uint32x4_t veorq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   5447   uint32x4_t __ret;
   5448   __ret = __p0 ^ __p1;
   5449   return __ret;
   5450 }
   5451 #else
   5452 __ai uint32x4_t veorq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   5453   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   5454   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   5455   uint32x4_t __ret;
   5456   __ret = __rev0 ^ __rev1;
   5457   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5458   return __ret;
   5459 }
   5460 #endif
   5461 
   5462 #ifdef __LITTLE_ENDIAN__
   5463 __ai uint64x2_t veorq_u64(uint64x2_t __p0, uint64x2_t __p1) {
   5464   uint64x2_t __ret;
   5465   __ret = __p0 ^ __p1;
   5466   return __ret;
   5467 }
   5468 #else
   5469 __ai uint64x2_t veorq_u64(uint64x2_t __p0, uint64x2_t __p1) {
   5470   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   5471   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   5472   uint64x2_t __ret;
   5473   __ret = __rev0 ^ __rev1;
   5474   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5475   return __ret;
   5476 }
   5477 #endif
   5478 
   5479 #ifdef __LITTLE_ENDIAN__
   5480 __ai uint16x8_t veorq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   5481   uint16x8_t __ret;
   5482   __ret = __p0 ^ __p1;
   5483   return __ret;
   5484 }
   5485 #else
   5486 __ai uint16x8_t veorq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   5487   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   5488   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   5489   uint16x8_t __ret;
   5490   __ret = __rev0 ^ __rev1;
   5491   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5492   return __ret;
   5493 }
   5494 #endif
   5495 
   5496 #ifdef __LITTLE_ENDIAN__
   5497 __ai int8x16_t veorq_s8(int8x16_t __p0, int8x16_t __p1) {
   5498   int8x16_t __ret;
   5499   __ret = __p0 ^ __p1;
   5500   return __ret;
   5501 }
   5502 #else
   5503 __ai int8x16_t veorq_s8(int8x16_t __p0, int8x16_t __p1) {
   5504   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5505   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5506   int8x16_t __ret;
   5507   __ret = __rev0 ^ __rev1;
   5508   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   5509   return __ret;
   5510 }
   5511 #endif
   5512 
   5513 #ifdef __LITTLE_ENDIAN__
   5514 __ai int32x4_t veorq_s32(int32x4_t __p0, int32x4_t __p1) {
   5515   int32x4_t __ret;
   5516   __ret = __p0 ^ __p1;
   5517   return __ret;
   5518 }
   5519 #else
   5520 __ai int32x4_t veorq_s32(int32x4_t __p0, int32x4_t __p1) {
   5521   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   5522   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   5523   int32x4_t __ret;
   5524   __ret = __rev0 ^ __rev1;
   5525   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5526   return __ret;
   5527 }
   5528 #endif
   5529 
   5530 #ifdef __LITTLE_ENDIAN__
   5531 __ai int64x2_t veorq_s64(int64x2_t __p0, int64x2_t __p1) {
   5532   int64x2_t __ret;
   5533   __ret = __p0 ^ __p1;
   5534   return __ret;
   5535 }
   5536 #else
   5537 __ai int64x2_t veorq_s64(int64x2_t __p0, int64x2_t __p1) {
   5538   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   5539   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   5540   int64x2_t __ret;
   5541   __ret = __rev0 ^ __rev1;
   5542   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5543   return __ret;
   5544 }
   5545 #endif
   5546 
   5547 #ifdef __LITTLE_ENDIAN__
   5548 __ai int16x8_t veorq_s16(int16x8_t __p0, int16x8_t __p1) {
   5549   int16x8_t __ret;
   5550   __ret = __p0 ^ __p1;
   5551   return __ret;
   5552 }
   5553 #else
   5554 __ai int16x8_t veorq_s16(int16x8_t __p0, int16x8_t __p1) {
   5555   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   5556   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   5557   int16x8_t __ret;
   5558   __ret = __rev0 ^ __rev1;
   5559   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5560   return __ret;
   5561 }
   5562 #endif
   5563 
   5564 #ifdef __LITTLE_ENDIAN__
   5565 __ai uint8x8_t veor_u8(uint8x8_t __p0, uint8x8_t __p1) {
   5566   uint8x8_t __ret;
   5567   __ret = __p0 ^ __p1;
   5568   return __ret;
   5569 }
   5570 #else
   5571 __ai uint8x8_t veor_u8(uint8x8_t __p0, uint8x8_t __p1) {
   5572   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   5573   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   5574   uint8x8_t __ret;
   5575   __ret = __rev0 ^ __rev1;
   5576   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5577   return __ret;
   5578 }
   5579 #endif
   5580 
   5581 #ifdef __LITTLE_ENDIAN__
   5582 __ai uint32x2_t veor_u32(uint32x2_t __p0, uint32x2_t __p1) {
   5583   uint32x2_t __ret;
   5584   __ret = __p0 ^ __p1;
   5585   return __ret;
   5586 }
   5587 #else
   5588 __ai uint32x2_t veor_u32(uint32x2_t __p0, uint32x2_t __p1) {
   5589   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   5590   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   5591   uint32x2_t __ret;
   5592   __ret = __rev0 ^ __rev1;
   5593   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5594   return __ret;
   5595 }
   5596 #endif
   5597 
   5598 #ifdef __LITTLE_ENDIAN__
   5599 __ai uint64x1_t veor_u64(uint64x1_t __p0, uint64x1_t __p1) {
   5600   uint64x1_t __ret;
   5601   __ret = __p0 ^ __p1;
   5602   return __ret;
   5603 }
   5604 #else
   5605 __ai uint64x1_t veor_u64(uint64x1_t __p0, uint64x1_t __p1) {
   5606   uint64x1_t __ret;
   5607   __ret = __p0 ^ __p1;
   5608   return __ret;
   5609 }
   5610 #endif
   5611 
   5612 #ifdef __LITTLE_ENDIAN__
   5613 __ai uint16x4_t veor_u16(uint16x4_t __p0, uint16x4_t __p1) {
   5614   uint16x4_t __ret;
   5615   __ret = __p0 ^ __p1;
   5616   return __ret;
   5617 }
   5618 #else
   5619 __ai uint16x4_t veor_u16(uint16x4_t __p0, uint16x4_t __p1) {
   5620   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   5621   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   5622   uint16x4_t __ret;
   5623   __ret = __rev0 ^ __rev1;
   5624   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5625   return __ret;
   5626 }
   5627 #endif
   5628 
   5629 #ifdef __LITTLE_ENDIAN__
   5630 __ai int8x8_t veor_s8(int8x8_t __p0, int8x8_t __p1) {
   5631   int8x8_t __ret;
   5632   __ret = __p0 ^ __p1;
   5633   return __ret;
   5634 }
   5635 #else
   5636 __ai int8x8_t veor_s8(int8x8_t __p0, int8x8_t __p1) {
   5637   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   5638   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   5639   int8x8_t __ret;
   5640   __ret = __rev0 ^ __rev1;
   5641   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   5642   return __ret;
   5643 }
   5644 #endif
   5645 
   5646 #ifdef __LITTLE_ENDIAN__
   5647 __ai int32x2_t veor_s32(int32x2_t __p0, int32x2_t __p1) {
   5648   int32x2_t __ret;
   5649   __ret = __p0 ^ __p1;
   5650   return __ret;
   5651 }
   5652 #else
   5653 __ai int32x2_t veor_s32(int32x2_t __p0, int32x2_t __p1) {
   5654   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   5655   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   5656   int32x2_t __ret;
   5657   __ret = __rev0 ^ __rev1;
   5658   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   5659   return __ret;
   5660 }
   5661 #endif
   5662 
   5663 #ifdef __LITTLE_ENDIAN__
   5664 __ai int64x1_t veor_s64(int64x1_t __p0, int64x1_t __p1) {
   5665   int64x1_t __ret;
   5666   __ret = __p0 ^ __p1;
   5667   return __ret;
   5668 }
   5669 #else
   5670 __ai int64x1_t veor_s64(int64x1_t __p0, int64x1_t __p1) {
   5671   int64x1_t __ret;
   5672   __ret = __p0 ^ __p1;
   5673   return __ret;
   5674 }
   5675 #endif
   5676 
   5677 #ifdef __LITTLE_ENDIAN__
   5678 __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) {
   5679   int16x4_t __ret;
   5680   __ret = __p0 ^ __p1;
   5681   return __ret;
   5682 }
   5683 #else
   5684 __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) {
   5685   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   5686   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   5687   int16x4_t __ret;
   5688   __ret = __rev0 ^ __rev1;
   5689   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   5690   return __ret;
   5691 }
   5692 #endif
   5693 
   5694 #ifdef __LITTLE_ENDIAN__
   5695 #define vext_p8(__p0, __p1, __p2) __extension__ ({ \
   5696   poly8x8_t __s0 = __p0; \
   5697   poly8x8_t __s1 = __p1; \
   5698   poly8x8_t __ret; \
   5699   __ret = (poly8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \
   5700   __ret; \
   5701 })
   5702 #else
   5703 #define vext_p8(__p0, __p1, __p2) __extension__ ({ \
   5704   poly8x8_t __s0 = __p0; \
   5705   poly8x8_t __s1 = __p1; \
   5706   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   5707   poly8x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   5708   poly8x8_t __ret; \
   5709   __ret = (poly8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \
   5710   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   5711   __ret; \
   5712 })
   5713 #endif
   5714 
   5715 #ifdef __LITTLE_ENDIAN__
   5716 #define vext_p16(__p0, __p1, __p2) __extension__ ({ \
   5717   poly16x4_t __s0 = __p0; \
   5718   poly16x4_t __s1 = __p1; \
   5719   poly16x4_t __ret; \
   5720   __ret = (poly16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \
   5721   __ret; \
   5722 })
   5723 #else
   5724 #define vext_p16(__p0, __p1, __p2) __extension__ ({ \
   5725   poly16x4_t __s0 = __p0; \
   5726   poly16x4_t __s1 = __p1; \
   5727   poly16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   5728   poly16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   5729   poly16x4_t __ret; \
   5730   __ret = (poly16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \
   5731   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   5732   __ret; \
   5733 })
   5734 #endif
   5735 
   5736 #ifdef __LITTLE_ENDIAN__
   5737 #define vextq_p8(__p0, __p1, __p2) __extension__ ({ \
   5738   poly8x16_t __s0 = __p0; \
   5739   poly8x16_t __s1 = __p1; \
   5740   poly8x16_t __ret; \
   5741   __ret = (poly8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \
   5742   __ret; \
   5743 })
   5744 #else
   5745 #define vextq_p8(__p0, __p1, __p2) __extension__ ({ \
   5746   poly8x16_t __s0 = __p0; \
   5747   poly8x16_t __s1 = __p1; \
   5748   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5749   poly8x16_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5750   poly8x16_t __ret; \
   5751   __ret = (poly8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \
   5752   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5753   __ret; \
   5754 })
   5755 #endif
   5756 
   5757 #ifdef __LITTLE_ENDIAN__
   5758 #define vextq_p16(__p0, __p1, __p2) __extension__ ({ \
   5759   poly16x8_t __s0 = __p0; \
   5760   poly16x8_t __s1 = __p1; \
   5761   poly16x8_t __ret; \
   5762   __ret = (poly16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \
   5763   __ret; \
   5764 })
   5765 #else
   5766 #define vextq_p16(__p0, __p1, __p2) __extension__ ({ \
   5767   poly16x8_t __s0 = __p0; \
   5768   poly16x8_t __s1 = __p1; \
   5769   poly16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   5770   poly16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   5771   poly16x8_t __ret; \
   5772   __ret = (poly16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \
   5773   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   5774   __ret; \
   5775 })
   5776 #endif
   5777 
   5778 #ifdef __LITTLE_ENDIAN__
   5779 #define vextq_u8(__p0, __p1, __p2) __extension__ ({ \
   5780   uint8x16_t __s0 = __p0; \
   5781   uint8x16_t __s1 = __p1; \
   5782   uint8x16_t __ret; \
   5783   __ret = (uint8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \
   5784   __ret; \
   5785 })
   5786 #else
   5787 #define vextq_u8(__p0, __p1, __p2) __extension__ ({ \
   5788   uint8x16_t __s0 = __p0; \
   5789   uint8x16_t __s1 = __p1; \
   5790   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5791   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5792   uint8x16_t __ret; \
   5793   __ret = (uint8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \
   5794   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5795   __ret; \
   5796 })
   5797 #endif
   5798 
   5799 #ifdef __LITTLE_ENDIAN__
   5800 #define vextq_u32(__p0, __p1, __p2) __extension__ ({ \
   5801   uint32x4_t __s0 = __p0; \
   5802   uint32x4_t __s1 = __p1; \
   5803   uint32x4_t __ret; \
   5804   __ret = (uint32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \
   5805   __ret; \
   5806 })
   5807 #else
   5808 #define vextq_u32(__p0, __p1, __p2) __extension__ ({ \
   5809   uint32x4_t __s0 = __p0; \
   5810   uint32x4_t __s1 = __p1; \
   5811   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   5812   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   5813   uint32x4_t __ret; \
   5814   __ret = (uint32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \
   5815   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   5816   __ret; \
   5817 })
   5818 #endif
   5819 
   5820 #ifdef __LITTLE_ENDIAN__
   5821 #define vextq_u64(__p0, __p1, __p2) __extension__ ({ \
   5822   uint64x2_t __s0 = __p0; \
   5823   uint64x2_t __s1 = __p1; \
   5824   uint64x2_t __ret; \
   5825   __ret = (uint64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \
   5826   __ret; \
   5827 })
   5828 #else
   5829 #define vextq_u64(__p0, __p1, __p2) __extension__ ({ \
   5830   uint64x2_t __s0 = __p0; \
   5831   uint64x2_t __s1 = __p1; \
   5832   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   5833   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   5834   uint64x2_t __ret; \
   5835   __ret = (uint64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \
   5836   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   5837   __ret; \
   5838 })
   5839 #endif
   5840 
   5841 #ifdef __LITTLE_ENDIAN__
   5842 #define vextq_u16(__p0, __p1, __p2) __extension__ ({ \
   5843   uint16x8_t __s0 = __p0; \
   5844   uint16x8_t __s1 = __p1; \
   5845   uint16x8_t __ret; \
   5846   __ret = (uint16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \
   5847   __ret; \
   5848 })
   5849 #else
   5850 #define vextq_u16(__p0, __p1, __p2) __extension__ ({ \
   5851   uint16x8_t __s0 = __p0; \
   5852   uint16x8_t __s1 = __p1; \
   5853   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   5854   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   5855   uint16x8_t __ret; \
   5856   __ret = (uint16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \
   5857   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   5858   __ret; \
   5859 })
   5860 #endif
   5861 
   5862 #ifdef __LITTLE_ENDIAN__
   5863 #define vextq_s8(__p0, __p1, __p2) __extension__ ({ \
   5864   int8x16_t __s0 = __p0; \
   5865   int8x16_t __s1 = __p1; \
   5866   int8x16_t __ret; \
   5867   __ret = (int8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \
   5868   __ret; \
   5869 })
   5870 #else
   5871 #define vextq_s8(__p0, __p1, __p2) __extension__ ({ \
   5872   int8x16_t __s0 = __p0; \
   5873   int8x16_t __s1 = __p1; \
   5874   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5875   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5876   int8x16_t __ret; \
   5877   __ret = (int8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \
   5878   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   5879   __ret; \
   5880 })
   5881 #endif
   5882 
   5883 #ifdef __LITTLE_ENDIAN__
   5884 #define vextq_f32(__p0, __p1, __p2) __extension__ ({ \
   5885   float32x4_t __s0 = __p0; \
   5886   float32x4_t __s1 = __p1; \
   5887   float32x4_t __ret; \
   5888   __ret = (float32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 41); \
   5889   __ret; \
   5890 })
   5891 #else
   5892 #define vextq_f32(__p0, __p1, __p2) __extension__ ({ \
   5893   float32x4_t __s0 = __p0; \
   5894   float32x4_t __s1 = __p1; \
   5895   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   5896   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   5897   float32x4_t __ret; \
   5898   __ret = (float32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 41); \
   5899   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   5900   __ret; \
   5901 })
   5902 #endif
   5903 
   5904 #ifdef __LITTLE_ENDIAN__
   5905 #define vextq_s32(__p0, __p1, __p2) __extension__ ({ \
   5906   int32x4_t __s0 = __p0; \
   5907   int32x4_t __s1 = __p1; \
   5908   int32x4_t __ret; \
   5909   __ret = (int32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \
   5910   __ret; \
   5911 })
   5912 #else
   5913 #define vextq_s32(__p0, __p1, __p2) __extension__ ({ \
   5914   int32x4_t __s0 = __p0; \
   5915   int32x4_t __s1 = __p1; \
   5916   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   5917   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   5918   int32x4_t __ret; \
   5919   __ret = (int32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \
   5920   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   5921   __ret; \
   5922 })
   5923 #endif
   5924 
   5925 #ifdef __LITTLE_ENDIAN__
   5926 #define vextq_s64(__p0, __p1, __p2) __extension__ ({ \
   5927   int64x2_t __s0 = __p0; \
   5928   int64x2_t __s1 = __p1; \
   5929   int64x2_t __ret; \
   5930   __ret = (int64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \
   5931   __ret; \
   5932 })
   5933 #else
   5934 #define vextq_s64(__p0, __p1, __p2) __extension__ ({ \
   5935   int64x2_t __s0 = __p0; \
   5936   int64x2_t __s1 = __p1; \
   5937   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   5938   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   5939   int64x2_t __ret; \
   5940   __ret = (int64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \
   5941   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   5942   __ret; \
   5943 })
   5944 #endif
   5945 
   5946 #ifdef __LITTLE_ENDIAN__
   5947 #define vextq_s16(__p0, __p1, __p2) __extension__ ({ \
   5948   int16x8_t __s0 = __p0; \
   5949   int16x8_t __s1 = __p1; \
   5950   int16x8_t __ret; \
   5951   __ret = (int16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \
   5952   __ret; \
   5953 })
   5954 #else
   5955 #define vextq_s16(__p0, __p1, __p2) __extension__ ({ \
   5956   int16x8_t __s0 = __p0; \
   5957   int16x8_t __s1 = __p1; \
   5958   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   5959   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   5960   int16x8_t __ret; \
   5961   __ret = (int16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \
   5962   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   5963   __ret; \
   5964 })
   5965 #endif
   5966 
   5967 #ifdef __LITTLE_ENDIAN__
   5968 #define vext_u8(__p0, __p1, __p2) __extension__ ({ \
   5969   uint8x8_t __s0 = __p0; \
   5970   uint8x8_t __s1 = __p1; \
   5971   uint8x8_t __ret; \
   5972   __ret = (uint8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \
   5973   __ret; \
   5974 })
   5975 #else
   5976 #define vext_u8(__p0, __p1, __p2) __extension__ ({ \
   5977   uint8x8_t __s0 = __p0; \
   5978   uint8x8_t __s1 = __p1; \
   5979   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   5980   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   5981   uint8x8_t __ret; \
   5982   __ret = (uint8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \
   5983   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   5984   __ret; \
   5985 })
   5986 #endif
   5987 
   5988 #ifdef __LITTLE_ENDIAN__
   5989 #define vext_u32(__p0, __p1, __p2) __extension__ ({ \
   5990   uint32x2_t __s0 = __p0; \
   5991   uint32x2_t __s1 = __p1; \
   5992   uint32x2_t __ret; \
   5993   __ret = (uint32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \
   5994   __ret; \
   5995 })
   5996 #else
   5997 #define vext_u32(__p0, __p1, __p2) __extension__ ({ \
   5998   uint32x2_t __s0 = __p0; \
   5999   uint32x2_t __s1 = __p1; \
   6000   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6001   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   6002   uint32x2_t __ret; \
   6003   __ret = (uint32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \
   6004   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   6005   __ret; \
   6006 })
   6007 #endif
   6008 
   6009 #ifdef __LITTLE_ENDIAN__
   6010 #define vext_u64(__p0, __p1, __p2) __extension__ ({ \
   6011   uint64x1_t __s0 = __p0; \
   6012   uint64x1_t __s1 = __p1; \
   6013   uint64x1_t __ret; \
   6014   __ret = (uint64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \
   6015   __ret; \
   6016 })
   6017 #else
   6018 #define vext_u64(__p0, __p1, __p2) __extension__ ({ \
   6019   uint64x1_t __s0 = __p0; \
   6020   uint64x1_t __s1 = __p1; \
   6021   uint64x1_t __ret; \
   6022   __ret = (uint64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \
   6023   __ret; \
   6024 })
   6025 #endif
   6026 
   6027 #ifdef __LITTLE_ENDIAN__
   6028 #define vext_u16(__p0, __p1, __p2) __extension__ ({ \
   6029   uint16x4_t __s0 = __p0; \
   6030   uint16x4_t __s1 = __p1; \
   6031   uint16x4_t __ret; \
   6032   __ret = (uint16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \
   6033   __ret; \
   6034 })
   6035 #else
   6036 #define vext_u16(__p0, __p1, __p2) __extension__ ({ \
   6037   uint16x4_t __s0 = __p0; \
   6038   uint16x4_t __s1 = __p1; \
   6039   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6040   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   6041   uint16x4_t __ret; \
   6042   __ret = (uint16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \
   6043   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   6044   __ret; \
   6045 })
   6046 #endif
   6047 
   6048 #ifdef __LITTLE_ENDIAN__
   6049 #define vext_s8(__p0, __p1, __p2) __extension__ ({ \
   6050   int8x8_t __s0 = __p0; \
   6051   int8x8_t __s1 = __p1; \
   6052   int8x8_t __ret; \
   6053   __ret = (int8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \
   6054   __ret; \
   6055 })
   6056 #else
   6057 #define vext_s8(__p0, __p1, __p2) __extension__ ({ \
   6058   int8x8_t __s0 = __p0; \
   6059   int8x8_t __s1 = __p1; \
   6060   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   6061   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   6062   int8x8_t __ret; \
   6063   __ret = (int8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \
   6064   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   6065   __ret; \
   6066 })
   6067 #endif
   6068 
   6069 #ifdef __LITTLE_ENDIAN__
   6070 #define vext_f32(__p0, __p1, __p2) __extension__ ({ \
   6071   float32x2_t __s0 = __p0; \
   6072   float32x2_t __s1 = __p1; \
   6073   float32x2_t __ret; \
   6074   __ret = (float32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 9); \
   6075   __ret; \
   6076 })
   6077 #else
   6078 #define vext_f32(__p0, __p1, __p2) __extension__ ({ \
   6079   float32x2_t __s0 = __p0; \
   6080   float32x2_t __s1 = __p1; \
   6081   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6082   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   6083   float32x2_t __ret; \
   6084   __ret = (float32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 9); \
   6085   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   6086   __ret; \
   6087 })
   6088 #endif
   6089 
   6090 #ifdef __LITTLE_ENDIAN__
   6091 #define vext_s32(__p0, __p1, __p2) __extension__ ({ \
   6092   int32x2_t __s0 = __p0; \
   6093   int32x2_t __s1 = __p1; \
   6094   int32x2_t __ret; \
   6095   __ret = (int32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \
   6096   __ret; \
   6097 })
   6098 #else
   6099 #define vext_s32(__p0, __p1, __p2) __extension__ ({ \
   6100   int32x2_t __s0 = __p0; \
   6101   int32x2_t __s1 = __p1; \
   6102   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6103   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   6104   int32x2_t __ret; \
   6105   __ret = (int32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \
   6106   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   6107   __ret; \
   6108 })
   6109 #endif
   6110 
   6111 #ifdef __LITTLE_ENDIAN__
   6112 #define vext_s64(__p0, __p1, __p2) __extension__ ({ \
   6113   int64x1_t __s0 = __p0; \
   6114   int64x1_t __s1 = __p1; \
   6115   int64x1_t __ret; \
   6116   __ret = (int64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \
   6117   __ret; \
   6118 })
   6119 #else
   6120 #define vext_s64(__p0, __p1, __p2) __extension__ ({ \
   6121   int64x1_t __s0 = __p0; \
   6122   int64x1_t __s1 = __p1; \
   6123   int64x1_t __ret; \
   6124   __ret = (int64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \
   6125   __ret; \
   6126 })
   6127 #endif
   6128 
   6129 #ifdef __LITTLE_ENDIAN__
   6130 #define vext_s16(__p0, __p1, __p2) __extension__ ({ \
   6131   int16x4_t __s0 = __p0; \
   6132   int16x4_t __s1 = __p1; \
   6133   int16x4_t __ret; \
   6134   __ret = (int16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \
   6135   __ret; \
   6136 })
   6137 #else
   6138 #define vext_s16(__p0, __p1, __p2) __extension__ ({ \
   6139   int16x4_t __s0 = __p0; \
   6140   int16x4_t __s1 = __p1; \
   6141   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6142   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   6143   int16x4_t __ret; \
   6144   __ret = (int16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \
   6145   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   6146   __ret; \
   6147 })
   6148 #endif
   6149 
   6150 #ifdef __LITTLE_ENDIAN__
   6151 __ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) {
   6152   float32x4_t __ret;
   6153   __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41);
   6154   return __ret;
   6155 }
   6156 #else
   6157 __ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) {
   6158   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   6159   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   6160   float32x4_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
   6161   float32x4_t __ret;
   6162   __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41);
   6163   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   6164   return __ret;
   6165 }
   6166 __ai float32x4_t __noswap_vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) {
   6167   float32x4_t __ret;
   6168   __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41);
   6169   return __ret;
   6170 }
   6171 #endif
   6172 
   6173 #ifdef __LITTLE_ENDIAN__
   6174 __ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) {
   6175   float32x2_t __ret;
   6176   __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9);
   6177   return __ret;
   6178 }
   6179 #else
   6180 __ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) {
   6181   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   6182   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   6183   float32x2_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
   6184   float32x2_t __ret;
   6185   __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9);
   6186   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   6187   return __ret;
   6188 }
   6189 __ai float32x2_t __noswap_vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) {
   6190   float32x2_t __ret;
   6191   __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9);
   6192   return __ret;
   6193 }
   6194 #endif
   6195 
   6196 #ifdef __LITTLE_ENDIAN__
   6197 __ai poly8x8_t vget_high_p8(poly8x16_t __p0) {
   6198   poly8x8_t __ret;
   6199   __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15);
   6200   return __ret;
   6201 }
   6202 #else
   6203 __ai poly8x8_t vget_high_p8(poly8x16_t __p0) {
   6204   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   6205   poly8x8_t __ret;
   6206   __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15);
   6207   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   6208   return __ret;
   6209 }
   6210 __ai poly8x8_t __noswap_vget_high_p8(poly8x16_t __p0) {
   6211   poly8x8_t __ret;
   6212   __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15);
   6213   return __ret;
   6214 }
   6215 #endif
   6216 
   6217 #ifdef __LITTLE_ENDIAN__
   6218 __ai poly16x4_t vget_high_p16(poly16x8_t __p0) {
   6219   poly16x4_t __ret;
   6220   __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7);
   6221   return __ret;
   6222 }
   6223 #else
   6224 __ai poly16x4_t vget_high_p16(poly16x8_t __p0) {
   6225   poly16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   6226   poly16x4_t __ret;
   6227   __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7);
   6228   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   6229   return __ret;
   6230 }
   6231 #endif
   6232 
   6233 #ifdef __LITTLE_ENDIAN__
   6234 __ai uint8x8_t vget_high_u8(uint8x16_t __p0) {
   6235   uint8x8_t __ret;
   6236   __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15);
   6237   return __ret;
   6238 }
   6239 #else
   6240 __ai uint8x8_t vget_high_u8(uint8x16_t __p0) {
   6241   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   6242   uint8x8_t __ret;
   6243   __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15);
   6244   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   6245   return __ret;
   6246 }
   6247 __ai uint8x8_t __noswap_vget_high_u8(uint8x16_t __p0) {
   6248   uint8x8_t __ret;
   6249   __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15);
   6250   return __ret;
   6251 }
   6252 #endif
   6253 
   6254 #ifdef __LITTLE_ENDIAN__
   6255 __ai uint32x2_t vget_high_u32(uint32x4_t __p0) {
   6256   uint32x2_t __ret;
   6257   __ret = __builtin_shufflevector(__p0, __p0, 2, 3);
   6258   return __ret;
   6259 }
   6260 #else
   6261 __ai uint32x2_t vget_high_u32(uint32x4_t __p0) {
   6262   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   6263   uint32x2_t __ret;
   6264   __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3);
   6265   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   6266   return __ret;
   6267 }
   6268 __ai uint32x2_t __noswap_vget_high_u32(uint32x4_t __p0) {
   6269   uint32x2_t __ret;
   6270   __ret = __builtin_shufflevector(__p0, __p0, 2, 3);
   6271   return __ret;
   6272 }
   6273 #endif
   6274 
   6275 #ifdef __LITTLE_ENDIAN__
   6276 __ai uint64x1_t vget_high_u64(uint64x2_t __p0) {
   6277   uint64x1_t __ret;
   6278   __ret = __builtin_shufflevector(__p0, __p0, 1);
   6279   return __ret;
   6280 }
   6281 #else
   6282 __ai uint64x1_t vget_high_u64(uint64x2_t __p0) {
   6283   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   6284   uint64x1_t __ret;
   6285   __ret = __builtin_shufflevector(__rev0, __rev0, 1);
   6286   return __ret;
   6287 }
   6288 #endif
   6289 
   6290 #ifdef __LITTLE_ENDIAN__
   6291 __ai uint16x4_t vget_high_u16(uint16x8_t __p0) {
   6292   uint16x4_t __ret;
   6293   __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7);
   6294   return __ret;
   6295 }
   6296 #else
   6297 __ai uint16x4_t vget_high_u16(uint16x8_t __p0) {
   6298   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   6299   uint16x4_t __ret;
   6300   __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7);
   6301   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   6302   return __ret;
   6303 }
   6304 __ai uint16x4_t __noswap_vget_high_u16(uint16x8_t __p0) {
   6305   uint16x4_t __ret;
   6306   __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7);
   6307   return __ret;
   6308 }
   6309 #endif
   6310 
   6311 #ifdef __LITTLE_ENDIAN__
   6312 __ai int8x8_t vget_high_s8(int8x16_t __p0) {
   6313   int8x8_t __ret;
   6314   __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15);
   6315   return __ret;
   6316 }
   6317 #else
   6318 __ai int8x8_t vget_high_s8(int8x16_t __p0) {
   6319   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   6320   int8x8_t __ret;
   6321   __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15);
   6322   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   6323   return __ret;
   6324 }
   6325 __ai int8x8_t __noswap_vget_high_s8(int8x16_t __p0) {
   6326   int8x8_t __ret;
   6327   __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15);
   6328   return __ret;
   6329 }
   6330 #endif
   6331 
   6332 #ifdef __LITTLE_ENDIAN__
   6333 __ai float32x2_t vget_high_f32(float32x4_t __p0) {
   6334   float32x2_t __ret;
   6335   __ret = __builtin_shufflevector(__p0, __p0, 2, 3);
   6336   return __ret;
   6337 }
   6338 #else
   6339 __ai float32x2_t vget_high_f32(float32x4_t __p0) {
   6340   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   6341   float32x2_t __ret;
   6342   __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3);
   6343   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   6344   return __ret;
   6345 }
   6346 __ai float32x2_t __noswap_vget_high_f32(float32x4_t __p0) {
   6347   float32x2_t __ret;
   6348   __ret = __builtin_shufflevector(__p0, __p0, 2, 3);
   6349   return __ret;
   6350 }
   6351 #endif
   6352 
   6353 #ifdef __LITTLE_ENDIAN__
   6354 __ai float16x4_t vget_high_f16(float16x8_t __p0) {
   6355   float16x4_t __ret;
   6356   __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7);
   6357   return __ret;
   6358 }
   6359 #else
   6360 __ai float16x4_t vget_high_f16(float16x8_t __p0) {
   6361   float16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   6362   float16x4_t __ret;
   6363   __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7);
   6364   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   6365   return __ret;
   6366 }
   6367 __ai float16x4_t __noswap_vget_high_f16(float16x8_t __p0) {
   6368   float16x4_t __ret;
   6369   __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7);
   6370   return __ret;
   6371 }
   6372 #endif
   6373 
   6374 #ifdef __LITTLE_ENDIAN__
   6375 __ai int32x2_t vget_high_s32(int32x4_t __p0) {
   6376   int32x2_t __ret;
   6377   __ret = __builtin_shufflevector(__p0, __p0, 2, 3);
   6378   return __ret;
   6379 }
   6380 #else
   6381 __ai int32x2_t vget_high_s32(int32x4_t __p0) {
   6382   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   6383   int32x2_t __ret;
   6384   __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3);
   6385   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   6386   return __ret;
   6387 }
   6388 __ai int32x2_t __noswap_vget_high_s32(int32x4_t __p0) {
   6389   int32x2_t __ret;
   6390   __ret = __builtin_shufflevector(__p0, __p0, 2, 3);
   6391   return __ret;
   6392 }
   6393 #endif
   6394 
   6395 #ifdef __LITTLE_ENDIAN__
   6396 __ai int64x1_t vget_high_s64(int64x2_t __p0) {
   6397   int64x1_t __ret;
   6398   __ret = __builtin_shufflevector(__p0, __p0, 1);
   6399   return __ret;
   6400 }
   6401 #else
   6402 __ai int64x1_t vget_high_s64(int64x2_t __p0) {
   6403   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   6404   int64x1_t __ret;
   6405   __ret = __builtin_shufflevector(__rev0, __rev0, 1);
   6406   return __ret;
   6407 }
   6408 #endif
   6409 
   6410 #ifdef __LITTLE_ENDIAN__
   6411 __ai int16x4_t vget_high_s16(int16x8_t __p0) {
   6412   int16x4_t __ret;
   6413   __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7);
   6414   return __ret;
   6415 }
   6416 #else
   6417 __ai int16x4_t vget_high_s16(int16x8_t __p0) {
   6418   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   6419   int16x4_t __ret;
   6420   __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7);
   6421   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   6422   return __ret;
   6423 }
   6424 __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) {
   6425   int16x4_t __ret;
   6426   __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7);
   6427   return __ret;
   6428 }
   6429 #endif
   6430 
   6431 #ifdef __LITTLE_ENDIAN__
   6432 #define vget_lane_p8(__p0, __p1) __extension__ ({ \
   6433   poly8x8_t __s0 = __p0; \
   6434   poly8_t __ret; \
   6435   __ret = (poly8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \
   6436   __ret; \
   6437 })
   6438 #else
   6439 #define vget_lane_p8(__p0, __p1) __extension__ ({ \
   6440   poly8x8_t __s0 = __p0; \
   6441   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   6442   poly8_t __ret; \
   6443   __ret = (poly8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \
   6444   __ret; \
   6445 })
   6446 #define __noswap_vget_lane_p8(__p0, __p1) __extension__ ({ \
   6447   poly8x8_t __s0 = __p0; \
   6448   poly8_t __ret; \
   6449   __ret = (poly8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \
   6450   __ret; \
   6451 })
   6452 #endif
   6453 
   6454 #ifdef __LITTLE_ENDIAN__
   6455 #define vget_lane_p16(__p0, __p1) __extension__ ({ \
   6456   poly16x4_t __s0 = __p0; \
   6457   poly16_t __ret; \
   6458   __ret = (poly16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \
   6459   __ret; \
   6460 })
   6461 #else
   6462 #define vget_lane_p16(__p0, __p1) __extension__ ({ \
   6463   poly16x4_t __s0 = __p0; \
   6464   poly16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6465   poly16_t __ret; \
   6466   __ret = (poly16_t) __builtin_neon_vget_lane_i16((int8x8_t)__rev0, __p1); \
   6467   __ret; \
   6468 })
   6469 #define __noswap_vget_lane_p16(__p0, __p1) __extension__ ({ \
   6470   poly16x4_t __s0 = __p0; \
   6471   poly16_t __ret; \
   6472   __ret = (poly16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \
   6473   __ret; \
   6474 })
   6475 #endif
   6476 
   6477 #ifdef __LITTLE_ENDIAN__
   6478 #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \
   6479   poly8x16_t __s0 = __p0; \
   6480   poly8_t __ret; \
   6481   __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \
   6482   __ret; \
   6483 })
   6484 #else
   6485 #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \
   6486   poly8x16_t __s0 = __p0; \
   6487   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   6488   poly8_t __ret; \
   6489   __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \
   6490   __ret; \
   6491 })
   6492 #define __noswap_vgetq_lane_p8(__p0, __p1) __extension__ ({ \
   6493   poly8x16_t __s0 = __p0; \
   6494   poly8_t __ret; \
   6495   __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \
   6496   __ret; \
   6497 })
   6498 #endif
   6499 
   6500 #ifdef __LITTLE_ENDIAN__
   6501 #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \
   6502   poly16x8_t __s0 = __p0; \
   6503   poly16_t __ret; \
   6504   __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \
   6505   __ret; \
   6506 })
   6507 #else
   6508 #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \
   6509   poly16x8_t __s0 = __p0; \
   6510   poly16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   6511   poly16_t __ret; \
   6512   __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__rev0, __p1); \
   6513   __ret; \
   6514 })
   6515 #define __noswap_vgetq_lane_p16(__p0, __p1) __extension__ ({ \
   6516   poly16x8_t __s0 = __p0; \
   6517   poly16_t __ret; \
   6518   __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \
   6519   __ret; \
   6520 })
   6521 #endif
   6522 
   6523 #ifdef __LITTLE_ENDIAN__
   6524 #define vgetq_lane_u8(__p0, __p1) __extension__ ({ \
   6525   uint8x16_t __s0 = __p0; \
   6526   uint8_t __ret; \
   6527   __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \
   6528   __ret; \
   6529 })
   6530 #else
   6531 #define vgetq_lane_u8(__p0, __p1) __extension__ ({ \
   6532   uint8x16_t __s0 = __p0; \
   6533   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   6534   uint8_t __ret; \
   6535   __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \
   6536   __ret; \
   6537 })
   6538 #define __noswap_vgetq_lane_u8(__p0, __p1) __extension__ ({ \
   6539   uint8x16_t __s0 = __p0; \
   6540   uint8_t __ret; \
   6541   __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \
   6542   __ret; \
   6543 })
   6544 #endif
   6545 
   6546 #ifdef __LITTLE_ENDIAN__
   6547 #define vgetq_lane_u32(__p0, __p1) __extension__ ({ \
   6548   uint32x4_t __s0 = __p0; \
   6549   uint32_t __ret; \
   6550   __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \
   6551   __ret; \
   6552 })
   6553 #else
   6554 #define vgetq_lane_u32(__p0, __p1) __extension__ ({ \
   6555   uint32x4_t __s0 = __p0; \
   6556   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6557   uint32_t __ret; \
   6558   __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__rev0, __p1); \
   6559   __ret; \
   6560 })
   6561 #define __noswap_vgetq_lane_u32(__p0, __p1) __extension__ ({ \
   6562   uint32x4_t __s0 = __p0; \
   6563   uint32_t __ret; \
   6564   __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \
   6565   __ret; \
   6566 })
   6567 #endif
   6568 
   6569 #ifdef __LITTLE_ENDIAN__
   6570 #define vgetq_lane_u64(__p0, __p1) __extension__ ({ \
   6571   uint64x2_t __s0 = __p0; \
   6572   uint64_t __ret; \
   6573   __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \
   6574   __ret; \
   6575 })
   6576 #else
   6577 #define vgetq_lane_u64(__p0, __p1) __extension__ ({ \
   6578   uint64x2_t __s0 = __p0; \
   6579   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6580   uint64_t __ret; \
   6581   __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__rev0, __p1); \
   6582   __ret; \
   6583 })
   6584 #define __noswap_vgetq_lane_u64(__p0, __p1) __extension__ ({ \
   6585   uint64x2_t __s0 = __p0; \
   6586   uint64_t __ret; \
   6587   __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \
   6588   __ret; \
   6589 })
   6590 #endif
   6591 
   6592 #ifdef __LITTLE_ENDIAN__
   6593 #define vgetq_lane_u16(__p0, __p1) __extension__ ({ \
   6594   uint16x8_t __s0 = __p0; \
   6595   uint16_t __ret; \
   6596   __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \
   6597   __ret; \
   6598 })
   6599 #else
   6600 #define vgetq_lane_u16(__p0, __p1) __extension__ ({ \
   6601   uint16x8_t __s0 = __p0; \
   6602   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   6603   uint16_t __ret; \
   6604   __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__rev0, __p1); \
   6605   __ret; \
   6606 })
   6607 #define __noswap_vgetq_lane_u16(__p0, __p1) __extension__ ({ \
   6608   uint16x8_t __s0 = __p0; \
   6609   uint16_t __ret; \
   6610   __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \
   6611   __ret; \
   6612 })
   6613 #endif
   6614 
   6615 #ifdef __LITTLE_ENDIAN__
   6616 #define vgetq_lane_s8(__p0, __p1) __extension__ ({ \
   6617   int8x16_t __s0 = __p0; \
   6618   int8_t __ret; \
   6619   __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \
   6620   __ret; \
   6621 })
   6622 #else
   6623 #define vgetq_lane_s8(__p0, __p1) __extension__ ({ \
   6624   int8x16_t __s0 = __p0; \
   6625   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   6626   int8_t __ret; \
   6627   __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \
   6628   __ret; \
   6629 })
   6630 #define __noswap_vgetq_lane_s8(__p0, __p1) __extension__ ({ \
   6631   int8x16_t __s0 = __p0; \
   6632   int8_t __ret; \
   6633   __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \
   6634   __ret; \
   6635 })
   6636 #endif
   6637 
   6638 #ifdef __LITTLE_ENDIAN__
   6639 #define vgetq_lane_f32(__p0, __p1) __extension__ ({ \
   6640   float32x4_t __s0 = __p0; \
   6641   float32_t __ret; \
   6642   __ret = (float32_t) __builtin_neon_vgetq_lane_f32((int8x16_t)__s0, __p1); \
   6643   __ret; \
   6644 })
   6645 #else
   6646 #define vgetq_lane_f32(__p0, __p1) __extension__ ({ \
   6647   float32x4_t __s0 = __p0; \
   6648   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6649   float32_t __ret; \
   6650   __ret = (float32_t) __builtin_neon_vgetq_lane_f32((int8x16_t)__rev0, __p1); \
   6651   __ret; \
   6652 })
   6653 #define __noswap_vgetq_lane_f32(__p0, __p1) __extension__ ({ \
   6654   float32x4_t __s0 = __p0; \
   6655   float32_t __ret; \
   6656   __ret = (float32_t) __builtin_neon_vgetq_lane_f32((int8x16_t)__s0, __p1); \
   6657   __ret; \
   6658 })
   6659 #endif
   6660 
   6661 #ifdef __LITTLE_ENDIAN__
   6662 #define vgetq_lane_s32(__p0, __p1) __extension__ ({ \
   6663   int32x4_t __s0 = __p0; \
   6664   int32_t __ret; \
   6665   __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \
   6666   __ret; \
   6667 })
   6668 #else
   6669 #define vgetq_lane_s32(__p0, __p1) __extension__ ({ \
   6670   int32x4_t __s0 = __p0; \
   6671   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6672   int32_t __ret; \
   6673   __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__rev0, __p1); \
   6674   __ret; \
   6675 })
   6676 #define __noswap_vgetq_lane_s32(__p0, __p1) __extension__ ({ \
   6677   int32x4_t __s0 = __p0; \
   6678   int32_t __ret; \
   6679   __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \
   6680   __ret; \
   6681 })
   6682 #endif
   6683 
   6684 #ifdef __LITTLE_ENDIAN__
   6685 #define vgetq_lane_s64(__p0, __p1) __extension__ ({ \
   6686   int64x2_t __s0 = __p0; \
   6687   int64_t __ret; \
   6688   __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \
   6689   __ret; \
   6690 })
   6691 #else
   6692 #define vgetq_lane_s64(__p0, __p1) __extension__ ({ \
   6693   int64x2_t __s0 = __p0; \
   6694   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6695   int64_t __ret; \
   6696   __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__rev0, __p1); \
   6697   __ret; \
   6698 })
   6699 #define __noswap_vgetq_lane_s64(__p0, __p1) __extension__ ({ \
   6700   int64x2_t __s0 = __p0; \
   6701   int64_t __ret; \
   6702   __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \
   6703   __ret; \
   6704 })
   6705 #endif
   6706 
   6707 #ifdef __LITTLE_ENDIAN__
   6708 #define vgetq_lane_s16(__p0, __p1) __extension__ ({ \
   6709   int16x8_t __s0 = __p0; \
   6710   int16_t __ret; \
   6711   __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \
   6712   __ret; \
   6713 })
   6714 #else
   6715 #define vgetq_lane_s16(__p0, __p1) __extension__ ({ \
   6716   int16x8_t __s0 = __p0; \
   6717   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   6718   int16_t __ret; \
   6719   __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__rev0, __p1); \
   6720   __ret; \
   6721 })
   6722 #define __noswap_vgetq_lane_s16(__p0, __p1) __extension__ ({ \
   6723   int16x8_t __s0 = __p0; \
   6724   int16_t __ret; \
   6725   __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \
   6726   __ret; \
   6727 })
   6728 #endif
   6729 
   6730 #ifdef __LITTLE_ENDIAN__
   6731 #define vget_lane_u8(__p0, __p1) __extension__ ({ \
   6732   uint8x8_t __s0 = __p0; \
   6733   uint8_t __ret; \
   6734   __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \
   6735   __ret; \
   6736 })
   6737 #else
   6738 #define vget_lane_u8(__p0, __p1) __extension__ ({ \
   6739   uint8x8_t __s0 = __p0; \
   6740   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   6741   uint8_t __ret; \
   6742   __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \
   6743   __ret; \
   6744 })
   6745 #define __noswap_vget_lane_u8(__p0, __p1) __extension__ ({ \
   6746   uint8x8_t __s0 = __p0; \
   6747   uint8_t __ret; \
   6748   __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \
   6749   __ret; \
   6750 })
   6751 #endif
   6752 
   6753 #ifdef __LITTLE_ENDIAN__
   6754 #define vget_lane_u32(__p0, __p1) __extension__ ({ \
   6755   uint32x2_t __s0 = __p0; \
   6756   uint32_t __ret; \
   6757   __ret = (uint32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \
   6758   __ret; \
   6759 })
   6760 #else
   6761 #define vget_lane_u32(__p0, __p1) __extension__ ({ \
   6762   uint32x2_t __s0 = __p0; \
   6763   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6764   uint32_t __ret; \
   6765   __ret = (uint32_t) __builtin_neon_vget_lane_i32((int8x8_t)__rev0, __p1); \
   6766   __ret; \
   6767 })
   6768 #define __noswap_vget_lane_u32(__p0, __p1) __extension__ ({ \
   6769   uint32x2_t __s0 = __p0; \
   6770   uint32_t __ret; \
   6771   __ret = (uint32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \
   6772   __ret; \
   6773 })
   6774 #endif
   6775 
   6776 #ifdef __LITTLE_ENDIAN__
   6777 #define vget_lane_u64(__p0, __p1) __extension__ ({ \
   6778   uint64x1_t __s0 = __p0; \
   6779   uint64_t __ret; \
   6780   __ret = (uint64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \
   6781   __ret; \
   6782 })
   6783 #else
   6784 #define vget_lane_u64(__p0, __p1) __extension__ ({ \
   6785   uint64x1_t __s0 = __p0; \
   6786   uint64_t __ret; \
   6787   __ret = (uint64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \
   6788   __ret; \
   6789 })
   6790 #define __noswap_vget_lane_u64(__p0, __p1) __extension__ ({ \
   6791   uint64x1_t __s0 = __p0; \
   6792   uint64_t __ret; \
   6793   __ret = (uint64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \
   6794   __ret; \
   6795 })
   6796 #endif
   6797 
   6798 #ifdef __LITTLE_ENDIAN__
   6799 #define vget_lane_u16(__p0, __p1) __extension__ ({ \
   6800   uint16x4_t __s0 = __p0; \
   6801   uint16_t __ret; \
   6802   __ret = (uint16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \
   6803   __ret; \
   6804 })
   6805 #else
   6806 #define vget_lane_u16(__p0, __p1) __extension__ ({ \
   6807   uint16x4_t __s0 = __p0; \
   6808   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6809   uint16_t __ret; \
   6810   __ret = (uint16_t) __builtin_neon_vget_lane_i16((int8x8_t)__rev0, __p1); \
   6811   __ret; \
   6812 })
   6813 #define __noswap_vget_lane_u16(__p0, __p1) __extension__ ({ \
   6814   uint16x4_t __s0 = __p0; \
   6815   uint16_t __ret; \
   6816   __ret = (uint16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \
   6817   __ret; \
   6818 })
   6819 #endif
   6820 
   6821 #ifdef __LITTLE_ENDIAN__
   6822 #define vget_lane_s8(__p0, __p1) __extension__ ({ \
   6823   int8x8_t __s0 = __p0; \
   6824   int8_t __ret; \
   6825   __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \
   6826   __ret; \
   6827 })
   6828 #else
   6829 #define vget_lane_s8(__p0, __p1) __extension__ ({ \
   6830   int8x8_t __s0 = __p0; \
   6831   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \
   6832   int8_t __ret; \
   6833   __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \
   6834   __ret; \
   6835 })
   6836 #define __noswap_vget_lane_s8(__p0, __p1) __extension__ ({ \
   6837   int8x8_t __s0 = __p0; \
   6838   int8_t __ret; \
   6839   __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \
   6840   __ret; \
   6841 })
   6842 #endif
   6843 
   6844 #ifdef __LITTLE_ENDIAN__
   6845 #define vget_lane_f32(__p0, __p1) __extension__ ({ \
   6846   float32x2_t __s0 = __p0; \
   6847   float32_t __ret; \
   6848   __ret = (float32_t) __builtin_neon_vget_lane_f32((int8x8_t)__s0, __p1); \
   6849   __ret; \
   6850 })
   6851 #else
   6852 #define vget_lane_f32(__p0, __p1) __extension__ ({ \
   6853   float32x2_t __s0 = __p0; \
   6854   float32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6855   float32_t __ret; \
   6856   __ret = (float32_t) __builtin_neon_vget_lane_f32((int8x8_t)__rev0, __p1); \
   6857   __ret; \
   6858 })
   6859 #define __noswap_vget_lane_f32(__p0, __p1) __extension__ ({ \
   6860   float32x2_t __s0 = __p0; \
   6861   float32_t __ret; \
   6862   __ret = (float32_t) __builtin_neon_vget_lane_f32((int8x8_t)__s0, __p1); \
   6863   __ret; \
   6864 })
   6865 #endif
   6866 
   6867 #ifdef __LITTLE_ENDIAN__
   6868 #define vget_lane_s32(__p0, __p1) __extension__ ({ \
   6869   int32x2_t __s0 = __p0; \
   6870   int32_t __ret; \
   6871   __ret = (int32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \
   6872   __ret; \
   6873 })
   6874 #else
   6875 #define vget_lane_s32(__p0, __p1) __extension__ ({ \
   6876   int32x2_t __s0 = __p0; \
   6877   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
   6878   int32_t __ret; \
   6879   __ret = (int32_t) __builtin_neon_vget_lane_i32((int8x8_t)__rev0, __p1); \
   6880   __ret; \
   6881 })
   6882 #define __noswap_vget_lane_s32(__p0, __p1) __extension__ ({ \
   6883   int32x2_t __s0 = __p0; \
   6884   int32_t __ret; \
   6885   __ret = (int32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \
   6886   __ret; \
   6887 })
   6888 #endif
   6889 
   6890 #ifdef __LITTLE_ENDIAN__
   6891 #define vget_lane_s64(__p0, __p1) __extension__ ({ \
   6892   int64x1_t __s0 = __p0; \
   6893   int64_t __ret; \
   6894   __ret = (int64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \
   6895   __ret; \
   6896 })
   6897 #else
   6898 #define vget_lane_s64(__p0, __p1) __extension__ ({ \
   6899   int64x1_t __s0 = __p0; \
   6900   int64_t __ret; \
   6901   __ret = (int64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \
   6902   __ret; \
   6903 })
   6904 #define __noswap_vget_lane_s64(__p0, __p1) __extension__ ({ \
   6905   int64x1_t __s0 = __p0; \
   6906   int64_t __ret; \
   6907   __ret = (int64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \
   6908   __ret; \
   6909 })
   6910 #endif
   6911 
   6912 #ifdef __LITTLE_ENDIAN__
   6913 #define vget_lane_s16(__p0, __p1) __extension__ ({ \
   6914   int16x4_t __s0 = __p0; \
   6915   int16_t __ret; \
   6916   __ret = (int16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \
   6917   __ret; \
   6918 })
   6919 #else
   6920 #define vget_lane_s16(__p0, __p1) __extension__ ({ \
   6921   int16x4_t __s0 = __p0; \
   6922   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
   6923   int16_t __ret; \
   6924   __ret = (int16_t) __builtin_neon_vget_lane_i16((int8x8_t)__rev0, __p1); \
   6925   __ret; \
   6926 })
   6927 #define __noswap_vget_lane_s16(__p0, __p1) __extension__ ({ \
   6928   int16x4_t __s0 = __p0; \
   6929   int16_t __ret; \
   6930   __ret = (int16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \
   6931   __ret; \
   6932 })
   6933 #endif
   6934 
   6935 #ifdef __LITTLE_ENDIAN__
   6936 __ai poly8x8_t vget_low_p8(poly8x16_t __p0) {
   6937   poly8x8_t __ret;
   6938   __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7);
   6939   return __ret;
   6940 }
   6941 #else
   6942 __ai poly8x8_t vget_low_p8(poly8x16_t __p0) {
   6943   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   6944   poly8x8_t __ret;
   6945   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7);
   6946   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   6947   return __ret;
   6948 }
   6949 #endif
   6950 
   6951 #ifdef __LITTLE_ENDIAN__
   6952 __ai poly16x4_t vget_low_p16(poly16x8_t __p0) {
   6953   poly16x4_t __ret;
   6954   __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3);
   6955   return __ret;
   6956 }
   6957 #else
   6958 __ai poly16x4_t vget_low_p16(poly16x8_t __p0) {
   6959   poly16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   6960   poly16x4_t __ret;
   6961   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3);
   6962   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   6963   return __ret;
   6964 }
   6965 #endif
   6966 
   6967 #ifdef __LITTLE_ENDIAN__
   6968 __ai uint8x8_t vget_low_u8(uint8x16_t __p0) {
   6969   uint8x8_t __ret;
   6970   __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7);
   6971   return __ret;
   6972 }
   6973 #else
   6974 __ai uint8x8_t vget_low_u8(uint8x16_t __p0) {
   6975   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   6976   uint8x8_t __ret;
   6977   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7);
   6978   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   6979   return __ret;
   6980 }
   6981 #endif
   6982 
   6983 #ifdef __LITTLE_ENDIAN__
   6984 __ai uint32x2_t vget_low_u32(uint32x4_t __p0) {
   6985   uint32x2_t __ret;
   6986   __ret = __builtin_shufflevector(__p0, __p0, 0, 1);
   6987   return __ret;
   6988 }
   6989 #else
   6990 __ai uint32x2_t vget_low_u32(uint32x4_t __p0) {
   6991   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   6992   uint32x2_t __ret;
   6993   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1);
   6994   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   6995   return __ret;
   6996 }
   6997 #endif
   6998 
   6999 #ifdef __LITTLE_ENDIAN__
   7000 __ai uint64x1_t vget_low_u64(uint64x2_t __p0) {
   7001   uint64x1_t __ret;
   7002   __ret = __builtin_shufflevector(__p0, __p0, 0);
   7003   return __ret;
   7004 }
   7005 #else
   7006 __ai uint64x1_t vget_low_u64(uint64x2_t __p0) {
   7007   uint64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   7008   uint64x1_t __ret;
   7009   __ret = __builtin_shufflevector(__rev0, __rev0, 0);
   7010   return __ret;
   7011 }
   7012 #endif
   7013 
   7014 #ifdef __LITTLE_ENDIAN__
   7015 __ai uint16x4_t vget_low_u16(uint16x8_t __p0) {
   7016   uint16x4_t __ret;
   7017   __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3);
   7018   return __ret;
   7019 }
   7020 #else
   7021 __ai uint16x4_t vget_low_u16(uint16x8_t __p0) {
   7022   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7023   uint16x4_t __ret;
   7024   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3);
   7025   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7026   return __ret;
   7027 }
   7028 #endif
   7029 
   7030 #ifdef __LITTLE_ENDIAN__
   7031 __ai int8x8_t vget_low_s8(int8x16_t __p0) {
   7032   int8x8_t __ret;
   7033   __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7);
   7034   return __ret;
   7035 }
   7036 #else
   7037 __ai int8x8_t vget_low_s8(int8x16_t __p0) {
   7038   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7039   int8x8_t __ret;
   7040   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7);
   7041   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7042   return __ret;
   7043 }
   7044 #endif
   7045 
   7046 #ifdef __LITTLE_ENDIAN__
   7047 __ai float32x2_t vget_low_f32(float32x4_t __p0) {
   7048   float32x2_t __ret;
   7049   __ret = __builtin_shufflevector(__p0, __p0, 0, 1);
   7050   return __ret;
   7051 }
   7052 #else
   7053 __ai float32x2_t vget_low_f32(float32x4_t __p0) {
   7054   float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7055   float32x2_t __ret;
   7056   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1);
   7057   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   7058   return __ret;
   7059 }
   7060 #endif
   7061 
   7062 #ifdef __LITTLE_ENDIAN__
   7063 __ai float16x4_t vget_low_f16(float16x8_t __p0) {
   7064   float16x4_t __ret;
   7065   __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3);
   7066   return __ret;
   7067 }
   7068 #else
   7069 __ai float16x4_t vget_low_f16(float16x8_t __p0) {
   7070   float16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7071   float16x4_t __ret;
   7072   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3);
   7073   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7074   return __ret;
   7075 }
   7076 #endif
   7077 
   7078 #ifdef __LITTLE_ENDIAN__
   7079 __ai int32x2_t vget_low_s32(int32x4_t __p0) {
   7080   int32x2_t __ret;
   7081   __ret = __builtin_shufflevector(__p0, __p0, 0, 1);
   7082   return __ret;
   7083 }
   7084 #else
   7085 __ai int32x2_t vget_low_s32(int32x4_t __p0) {
   7086   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7087   int32x2_t __ret;
   7088   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1);
   7089   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   7090   return __ret;
   7091 }
   7092 #endif
   7093 
   7094 #ifdef __LITTLE_ENDIAN__
   7095 __ai int64x1_t vget_low_s64(int64x2_t __p0) {
   7096   int64x1_t __ret;
   7097   __ret = __builtin_shufflevector(__p0, __p0, 0);
   7098   return __ret;
   7099 }
   7100 #else
   7101 __ai int64x1_t vget_low_s64(int64x2_t __p0) {
   7102   int64x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   7103   int64x1_t __ret;
   7104   __ret = __builtin_shufflevector(__rev0, __rev0, 0);
   7105   return __ret;
   7106 }
   7107 #endif
   7108 
   7109 #ifdef __LITTLE_ENDIAN__
   7110 __ai int16x4_t vget_low_s16(int16x8_t __p0) {
   7111   int16x4_t __ret;
   7112   __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3);
   7113   return __ret;
   7114 }
   7115 #else
   7116 __ai int16x4_t vget_low_s16(int16x8_t __p0) {
   7117   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7118   int16x4_t __ret;
   7119   __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3);
   7120   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7121   return __ret;
   7122 }
   7123 #endif
   7124 
   7125 #ifdef __LITTLE_ENDIAN__
   7126 __ai uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   7127   uint8x16_t __ret;
   7128   __ret = (uint8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48);
   7129   return __ret;
   7130 }
   7131 #else
   7132 __ai uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   7133   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7134   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7135   uint8x16_t __ret;
   7136   __ret = (uint8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48);
   7137   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7138   return __ret;
   7139 }
   7140 #endif
   7141 
   7142 #ifdef __LITTLE_ENDIAN__
   7143 __ai uint32x4_t vhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   7144   uint32x4_t __ret;
   7145   __ret = (uint32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
   7146   return __ret;
   7147 }
   7148 #else
   7149 __ai uint32x4_t vhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   7150   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7151   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7152   uint32x4_t __ret;
   7153   __ret = (uint32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
   7154   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7155   return __ret;
   7156 }
   7157 #endif
   7158 
   7159 #ifdef __LITTLE_ENDIAN__
   7160 __ai uint16x8_t vhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   7161   uint16x8_t __ret;
   7162   __ret = (uint16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49);
   7163   return __ret;
   7164 }
   7165 #else
   7166 __ai uint16x8_t vhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   7167   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7168   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7169   uint16x8_t __ret;
   7170   __ret = (uint16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49);
   7171   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7172   return __ret;
   7173 }
   7174 #endif
   7175 
   7176 #ifdef __LITTLE_ENDIAN__
   7177 __ai int8x16_t vhaddq_s8(int8x16_t __p0, int8x16_t __p1) {
   7178   int8x16_t __ret;
   7179   __ret = (int8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32);
   7180   return __ret;
   7181 }
   7182 #else
   7183 __ai int8x16_t vhaddq_s8(int8x16_t __p0, int8x16_t __p1) {
   7184   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7185   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7186   int8x16_t __ret;
   7187   __ret = (int8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32);
   7188   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7189   return __ret;
   7190 }
   7191 #endif
   7192 
   7193 #ifdef __LITTLE_ENDIAN__
   7194 __ai int32x4_t vhaddq_s32(int32x4_t __p0, int32x4_t __p1) {
   7195   int32x4_t __ret;
   7196   __ret = (int32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34);
   7197   return __ret;
   7198 }
   7199 #else
   7200 __ai int32x4_t vhaddq_s32(int32x4_t __p0, int32x4_t __p1) {
   7201   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7202   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7203   int32x4_t __ret;
   7204   __ret = (int32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34);
   7205   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7206   return __ret;
   7207 }
   7208 #endif
   7209 
   7210 #ifdef __LITTLE_ENDIAN__
   7211 __ai int16x8_t vhaddq_s16(int16x8_t __p0, int16x8_t __p1) {
   7212   int16x8_t __ret;
   7213   __ret = (int16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33);
   7214   return __ret;
   7215 }
   7216 #else
   7217 __ai int16x8_t vhaddq_s16(int16x8_t __p0, int16x8_t __p1) {
   7218   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7219   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7220   int16x8_t __ret;
   7221   __ret = (int16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33);
   7222   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7223   return __ret;
   7224 }
   7225 #endif
   7226 
   7227 #ifdef __LITTLE_ENDIAN__
   7228 __ai uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_t __p1) {
   7229   uint8x8_t __ret;
   7230   __ret = (uint8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16);
   7231   return __ret;
   7232 }
   7233 #else
   7234 __ai uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_t __p1) {
   7235   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7236   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7237   uint8x8_t __ret;
   7238   __ret = (uint8x8_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16);
   7239   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7240   return __ret;
   7241 }
   7242 #endif
   7243 
   7244 #ifdef __LITTLE_ENDIAN__
   7245 __ai uint32x2_t vhadd_u32(uint32x2_t __p0, uint32x2_t __p1) {
   7246   uint32x2_t __ret;
   7247   __ret = (uint32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
   7248   return __ret;
   7249 }
   7250 #else
   7251 __ai uint32x2_t vhadd_u32(uint32x2_t __p0, uint32x2_t __p1) {
   7252   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   7253   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   7254   uint32x2_t __ret;
   7255   __ret = (uint32x2_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18);
   7256   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   7257   return __ret;
   7258 }
   7259 #endif
   7260 
   7261 #ifdef __LITTLE_ENDIAN__
   7262 __ai uint16x4_t vhadd_u16(uint16x4_t __p0, uint16x4_t __p1) {
   7263   uint16x4_t __ret;
   7264   __ret = (uint16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17);
   7265   return __ret;
   7266 }
   7267 #else
   7268 __ai uint16x4_t vhadd_u16(uint16x4_t __p0, uint16x4_t __p1) {
   7269   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7270   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7271   uint16x4_t __ret;
   7272   __ret = (uint16x4_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17);
   7273   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7274   return __ret;
   7275 }
   7276 #endif
   7277 
   7278 #ifdef __LITTLE_ENDIAN__
   7279 __ai int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t __p1) {
   7280   int8x8_t __ret;
   7281   __ret = (int8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0);
   7282   return __ret;
   7283 }
   7284 #else
   7285 __ai int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t __p1) {
   7286   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7287   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7288   int8x8_t __ret;
   7289   __ret = (int8x8_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0);
   7290   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7291   return __ret;
   7292 }
   7293 #endif
   7294 
   7295 #ifdef __LITTLE_ENDIAN__
   7296 __ai int32x2_t vhadd_s32(int32x2_t __p0, int32x2_t __p1) {
   7297   int32x2_t __ret;
   7298   __ret = (int32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2);
   7299   return __ret;
   7300 }
   7301 #else
   7302 __ai int32x2_t vhadd_s32(int32x2_t __p0, int32x2_t __p1) {
   7303   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   7304   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   7305   int32x2_t __ret;
   7306   __ret = (int32x2_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2);
   7307   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   7308   return __ret;
   7309 }
   7310 #endif
   7311 
   7312 #ifdef __LITTLE_ENDIAN__
   7313 __ai int16x4_t vhadd_s16(int16x4_t __p0, int16x4_t __p1) {
   7314   int16x4_t __ret;
   7315   __ret = (int16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1);
   7316   return __ret;
   7317 }
   7318 #else
   7319 __ai int16x4_t vhadd_s16(int16x4_t __p0, int16x4_t __p1) {
   7320   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7321   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7322   int16x4_t __ret;
   7323   __ret = (int16x4_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1);
   7324   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7325   return __ret;
   7326 }
   7327 #endif
   7328 
   7329 #ifdef __LITTLE_ENDIAN__
   7330 __ai uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   7331   uint8x16_t __ret;
   7332   __ret = (uint8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48);
   7333   return __ret;
   7334 }
   7335 #else
   7336 __ai uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8x16_t __p1) {
   7337   uint8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7338   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7339   uint8x16_t __ret;
   7340   __ret = (uint8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48);
   7341   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7342   return __ret;
   7343 }
   7344 #endif
   7345 
   7346 #ifdef __LITTLE_ENDIAN__
   7347 __ai uint32x4_t vhsubq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   7348   uint32x4_t __ret;
   7349   __ret = (uint32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
   7350   return __ret;
   7351 }
   7352 #else
   7353 __ai uint32x4_t vhsubq_u32(uint32x4_t __p0, uint32x4_t __p1) {
   7354   uint32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7355   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7356   uint32x4_t __ret;
   7357   __ret = (uint32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
   7358   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7359   return __ret;
   7360 }
   7361 #endif
   7362 
   7363 #ifdef __LITTLE_ENDIAN__
   7364 __ai uint16x8_t vhsubq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   7365   uint16x8_t __ret;
   7366   __ret = (uint16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49);
   7367   return __ret;
   7368 }
   7369 #else
   7370 __ai uint16x8_t vhsubq_u16(uint16x8_t __p0, uint16x8_t __p1) {
   7371   uint16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7372   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7373   uint16x8_t __ret;
   7374   __ret = (uint16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49);
   7375   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7376   return __ret;
   7377 }
   7378 #endif
   7379 
   7380 #ifdef __LITTLE_ENDIAN__
   7381 __ai int8x16_t vhsubq_s8(int8x16_t __p0, int8x16_t __p1) {
   7382   int8x16_t __ret;
   7383   __ret = (int8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32);
   7384   return __ret;
   7385 }
   7386 #else
   7387 __ai int8x16_t vhsubq_s8(int8x16_t __p0, int8x16_t __p1) {
   7388   int8x16_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7389   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7390   int8x16_t __ret;
   7391   __ret = (int8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32);
   7392   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
   7393   return __ret;
   7394 }
   7395 #endif
   7396 
   7397 #ifdef __LITTLE_ENDIAN__
   7398 __ai int32x4_t vhsubq_s32(int32x4_t __p0, int32x4_t __p1) {
   7399   int32x4_t __ret;
   7400   __ret = (int32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34);
   7401   return __ret;
   7402 }
   7403 #else
   7404 __ai int32x4_t vhsubq_s32(int32x4_t __p0, int32x4_t __p1) {
   7405   int32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7406   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7407   int32x4_t __ret;
   7408   __ret = (int32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34);
   7409   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7410   return __ret;
   7411 }
   7412 #endif
   7413 
   7414 #ifdef __LITTLE_ENDIAN__
   7415 __ai int16x8_t vhsubq_s16(int16x8_t __p0, int16x8_t __p1) {
   7416   int16x8_t __ret;
   7417   __ret = (int16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33);
   7418   return __ret;
   7419 }
   7420 #else
   7421 __ai int16x8_t vhsubq_s16(int16x8_t __p0, int16x8_t __p1) {
   7422   int16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7423   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7424   int16x8_t __ret;
   7425   __ret = (int16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33);
   7426   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7427   return __ret;
   7428 }
   7429 #endif
   7430 
   7431 #ifdef __LITTLE_ENDIAN__
   7432 __ai uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_t __p1) {
   7433   uint8x8_t __ret;
   7434   __ret = (uint8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16);
   7435   return __ret;
   7436 }
   7437 #else
   7438 __ai uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_t __p1) {
   7439   uint8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7440   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7441   uint8x8_t __ret;
   7442   __ret = (uint8x8_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16);
   7443   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7444   return __ret;
   7445 }
   7446 #endif
   7447 
   7448 #ifdef __LITTLE_ENDIAN__
   7449 __ai uint32x2_t vhsub_u32(uint32x2_t __p0, uint32x2_t __p1) {
   7450   uint32x2_t __ret;
   7451   __ret = (uint32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18);
   7452   return __ret;
   7453 }
   7454 #else
   7455 __ai uint32x2_t vhsub_u32(uint32x2_t __p0, uint32x2_t __p1) {
   7456   uint32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   7457   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   7458   uint32x2_t __ret;
   7459   __ret = (uint32x2_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18);
   7460   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   7461   return __ret;
   7462 }
   7463 #endif
   7464 
   7465 #ifdef __LITTLE_ENDIAN__
   7466 __ai uint16x4_t vhsub_u16(uint16x4_t __p0, uint16x4_t __p1) {
   7467   uint16x4_t __ret;
   7468   __ret = (uint16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17);
   7469   return __ret;
   7470 }
   7471 #else
   7472 __ai uint16x4_t vhsub_u16(uint16x4_t __p0, uint16x4_t __p1) {
   7473   uint16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7474   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7475   uint16x4_t __ret;
   7476   __ret = (uint16x4_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17);
   7477   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7478   return __ret;
   7479 }
   7480 #endif
   7481 
   7482 #ifdef __LITTLE_ENDIAN__
   7483 __ai int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t __p1) {
   7484   int8x8_t __ret;
   7485   __ret = (int8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0);
   7486   return __ret;
   7487 }
   7488 #else
   7489 __ai int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t __p1) {
   7490   int8x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
   7491   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
   7492   int8x8_t __ret;
   7493   __ret = (int8x8_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0);
   7494   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
   7495   return __ret;
   7496 }
   7497 #endif
   7498 
   7499 #ifdef __LITTLE_ENDIAN__
   7500 __ai int32x2_t vhsub_s32(int32x2_t __p0, int32x2_t __p1) {
   7501   int32x2_t __ret;
   7502   __ret = (int32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2);
   7503   return __ret;
   7504 }
   7505 #else
   7506 __ai int32x2_t vhsub_s32(int32x2_t __p0, int32x2_t __p1) {
   7507   int32x2_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
   7508   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
   7509   int32x2_t __ret;
   7510   __ret = (int32x2_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2);
   7511   __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
   7512   return __ret;
   7513 }
   7514 #endif
   7515 
   7516 #ifdef __LITTLE_ENDIAN__
   7517 __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) {
   7518   int16x4_t __ret;
   7519   __ret = (int16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1);
   7520   return __ret;
   7521 }
   7522 #else
   7523 __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) {
   7524   int16x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
   7525   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
   7526   int16x4_t __ret;
   7527   __ret = (int16x4_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1);
   7528   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
   7529   return __ret;
   7530 }
   7531 #endif
   7532 
   7533 #ifdef __LITTLE_ENDIAN__
   7534 #define vld1_p8(__p0) __extension__ ({ \
   7535   poly8x8_t __ret; \
   7536   __ret = (poly8x8_t) __builtin_neon_vld1_v(__p0, 4); \
   7537   __ret; \
   7538 })
   7539 #else
   7540 #define vld1_p8(__p0) __extension__ ({ \
   7541   poly8x8_t __ret; \
   7542   __ret = (poly8x8_t) __builtin_neon_vld1_v(__p0, 4); \
   7543   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7544   __ret; \
   7545 })
   7546 #endif
   7547 
   7548 #ifdef __LITTLE_ENDIAN__
   7549 #define vld1_p16(__p0) __extension__ ({ \
   7550   poly16x4_t __ret; \
   7551   __ret = (poly16x4_t) __builtin_neon_vld1_v(__p0, 5); \
   7552   __ret; \
   7553 })
   7554 #else
   7555 #define vld1_p16(__p0) __extension__ ({ \
   7556   poly16x4_t __ret; \
   7557   __ret = (poly16x4_t) __builtin_neon_vld1_v(__p0, 5); \
   7558   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7559   __ret; \
   7560 })
   7561 #endif
   7562 
   7563 #ifdef __LITTLE_ENDIAN__
   7564 #define vld1q_p8(__p0) __extension__ ({ \
   7565   poly8x16_t __ret; \
   7566   __ret = (poly8x16_t) __builtin_neon_vld1q_v(__p0, 36); \
   7567   __ret; \
   7568 })
   7569 #else
   7570 #define vld1q_p8(__p0) __extension__ ({ \
   7571   poly8x16_t __ret; \
   7572   __ret = (poly8x16_t) __builtin_neon_vld1q_v(__p0, 36); \
   7573   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   7574   __ret; \
   7575 })
   7576 #endif
   7577 
   7578 #ifdef __LITTLE_ENDIAN__
   7579 #define vld1q_p16(__p0) __extension__ ({ \
   7580   poly16x8_t __ret; \
   7581   __ret = (poly16x8_t) __builtin_neon_vld1q_v(__p0, 37); \
   7582   __ret; \
   7583 })
   7584 #else
   7585 #define vld1q_p16(__p0) __extension__ ({ \
   7586   poly16x8_t __ret; \
   7587   __ret = (poly16x8_t) __builtin_neon_vld1q_v(__p0, 37); \
   7588   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7589   __ret; \
   7590 })
   7591 #endif
   7592 
   7593 #ifdef __LITTLE_ENDIAN__
   7594 #define vld1q_u8(__p0) __extension__ ({ \
   7595   uint8x16_t __ret; \
   7596   __ret = (uint8x16_t) __builtin_neon_vld1q_v(__p0, 48); \
   7597   __ret; \
   7598 })
   7599 #else
   7600 #define vld1q_u8(__p0) __extension__ ({ \
   7601   uint8x16_t __ret; \
   7602   __ret = (uint8x16_t) __builtin_neon_vld1q_v(__p0, 48); \
   7603   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   7604   __ret; \
   7605 })
   7606 #endif
   7607 
   7608 #ifdef __LITTLE_ENDIAN__
   7609 #define vld1q_u32(__p0) __extension__ ({ \
   7610   uint32x4_t __ret; \
   7611   __ret = (uint32x4_t) __builtin_neon_vld1q_v(__p0, 50); \
   7612   __ret; \
   7613 })
   7614 #else
   7615 #define vld1q_u32(__p0) __extension__ ({ \
   7616   uint32x4_t __ret; \
   7617   __ret = (uint32x4_t) __builtin_neon_vld1q_v(__p0, 50); \
   7618   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7619   __ret; \
   7620 })
   7621 #endif
   7622 
   7623 #ifdef __LITTLE_ENDIAN__
   7624 #define vld1q_u64(__p0) __extension__ ({ \
   7625   uint64x2_t __ret; \
   7626   __ret = (uint64x2_t) __builtin_neon_vld1q_v(__p0, 51); \
   7627   __ret; \
   7628 })
   7629 #else
   7630 #define vld1q_u64(__p0) __extension__ ({ \
   7631   uint64x2_t __ret; \
   7632   __ret = (uint64x2_t) __builtin_neon_vld1q_v(__p0, 51); \
   7633   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   7634   __ret; \
   7635 })
   7636 #endif
   7637 
   7638 #ifdef __LITTLE_ENDIAN__
   7639 #define vld1q_u16(__p0) __extension__ ({ \
   7640   uint16x8_t __ret; \
   7641   __ret = (uint16x8_t) __builtin_neon_vld1q_v(__p0, 49); \
   7642   __ret; \
   7643 })
   7644 #else
   7645 #define vld1q_u16(__p0) __extension__ ({ \
   7646   uint16x8_t __ret; \
   7647   __ret = (uint16x8_t) __builtin_neon_vld1q_v(__p0, 49); \
   7648   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7649   __ret; \
   7650 })
   7651 #endif
   7652 
   7653 #ifdef __LITTLE_ENDIAN__
   7654 #define vld1q_s8(__p0) __extension__ ({ \
   7655   int8x16_t __ret; \
   7656   __ret = (int8x16_t) __builtin_neon_vld1q_v(__p0, 32); \
   7657   __ret; \
   7658 })
   7659 #else
   7660 #define vld1q_s8(__p0) __extension__ ({ \
   7661   int8x16_t __ret; \
   7662   __ret = (int8x16_t) __builtin_neon_vld1q_v(__p0, 32); \
   7663   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   7664   __ret; \
   7665 })
   7666 #endif
   7667 
   7668 #ifdef __LITTLE_ENDIAN__
   7669 #define vld1q_f32(__p0) __extension__ ({ \
   7670   float32x4_t __ret; \
   7671   __ret = (float32x4_t) __builtin_neon_vld1q_v(__p0, 41); \
   7672   __ret; \
   7673 })
   7674 #else
   7675 #define vld1q_f32(__p0) __extension__ ({ \
   7676   float32x4_t __ret; \
   7677   __ret = (float32x4_t) __builtin_neon_vld1q_v(__p0, 41); \
   7678   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7679   __ret; \
   7680 })
   7681 #endif
   7682 
   7683 #ifdef __LITTLE_ENDIAN__
   7684 #define vld1q_f16(__p0) __extension__ ({ \
   7685   float16x8_t __ret; \
   7686   __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \
   7687   __ret; \
   7688 })
   7689 #else
   7690 #define vld1q_f16(__p0) __extension__ ({ \
   7691   float16x8_t __ret; \
   7692   __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \
   7693   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7694   __ret; \
   7695 })
   7696 #endif
   7697 
   7698 #ifdef __LITTLE_ENDIAN__
   7699 #define vld1q_s32(__p0) __extension__ ({ \
   7700   int32x4_t __ret; \
   7701   __ret = (int32x4_t) __builtin_neon_vld1q_v(__p0, 34); \
   7702   __ret; \
   7703 })
   7704 #else
   7705 #define vld1q_s32(__p0) __extension__ ({ \
   7706   int32x4_t __ret; \
   7707   __ret = (int32x4_t) __builtin_neon_vld1q_v(__p0, 34); \
   7708   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7709   __ret; \
   7710 })
   7711 #endif
   7712 
   7713 #ifdef __LITTLE_ENDIAN__
   7714 #define vld1q_s64(__p0) __extension__ ({ \
   7715   int64x2_t __ret; \
   7716   __ret = (int64x2_t) __builtin_neon_vld1q_v(__p0, 35); \
   7717   __ret; \
   7718 })
   7719 #else
   7720 #define vld1q_s64(__p0) __extension__ ({ \
   7721   int64x2_t __ret; \
   7722   __ret = (int64x2_t) __builtin_neon_vld1q_v(__p0, 35); \
   7723   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   7724   __ret; \
   7725 })
   7726 #endif
   7727 
   7728 #ifdef __LITTLE_ENDIAN__
   7729 #define vld1q_s16(__p0) __extension__ ({ \
   7730   int16x8_t __ret; \
   7731   __ret = (int16x8_t) __builtin_neon_vld1q_v(__p0, 33); \
   7732   __ret; \
   7733 })
   7734 #else
   7735 #define vld1q_s16(__p0) __extension__ ({ \
   7736   int16x8_t __ret; \
   7737   __ret = (int16x8_t) __builtin_neon_vld1q_v(__p0, 33); \
   7738   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7739   __ret; \
   7740 })
   7741 #endif
   7742 
   7743 #ifdef __LITTLE_ENDIAN__
   7744 #define vld1_u8(__p0) __extension__ ({ \
   7745   uint8x8_t __ret; \
   7746   __ret = (uint8x8_t) __builtin_neon_vld1_v(__p0, 16); \
   7747   __ret; \
   7748 })
   7749 #else
   7750 #define vld1_u8(__p0) __extension__ ({ \
   7751   uint8x8_t __ret; \
   7752   __ret = (uint8x8_t) __builtin_neon_vld1_v(__p0, 16); \
   7753   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7754   __ret; \
   7755 })
   7756 #endif
   7757 
   7758 #ifdef __LITTLE_ENDIAN__
   7759 #define vld1_u32(__p0) __extension__ ({ \
   7760   uint32x2_t __ret; \
   7761   __ret = (uint32x2_t) __builtin_neon_vld1_v(__p0, 18); \
   7762   __ret; \
   7763 })
   7764 #else
   7765 #define vld1_u32(__p0) __extension__ ({ \
   7766   uint32x2_t __ret; \
   7767   __ret = (uint32x2_t) __builtin_neon_vld1_v(__p0, 18); \
   7768   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   7769   __ret; \
   7770 })
   7771 #endif
   7772 
   7773 #ifdef __LITTLE_ENDIAN__
   7774 #define vld1_u64(__p0) __extension__ ({ \
   7775   uint64x1_t __ret; \
   7776   __ret = (uint64x1_t) __builtin_neon_vld1_v(__p0, 19); \
   7777   __ret; \
   7778 })
   7779 #else
   7780 #define vld1_u64(__p0) __extension__ ({ \
   7781   uint64x1_t __ret; \
   7782   __ret = (uint64x1_t) __builtin_neon_vld1_v(__p0, 19); \
   7783   __ret; \
   7784 })
   7785 #endif
   7786 
   7787 #ifdef __LITTLE_ENDIAN__
   7788 #define vld1_u16(__p0) __extension__ ({ \
   7789   uint16x4_t __ret; \
   7790   __ret = (uint16x4_t) __builtin_neon_vld1_v(__p0, 17); \
   7791   __ret; \
   7792 })
   7793 #else
   7794 #define vld1_u16(__p0) __extension__ ({ \
   7795   uint16x4_t __ret; \
   7796   __ret = (uint16x4_t) __builtin_neon_vld1_v(__p0, 17); \
   7797   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7798   __ret; \
   7799 })
   7800 #endif
   7801 
   7802 #ifdef __LITTLE_ENDIAN__
   7803 #define vld1_s8(__p0) __extension__ ({ \
   7804   int8x8_t __ret; \
   7805   __ret = (int8x8_t) __builtin_neon_vld1_v(__p0, 0); \
   7806   __ret; \
   7807 })
   7808 #else
   7809 #define vld1_s8(__p0) __extension__ ({ \
   7810   int8x8_t __ret; \
   7811   __ret = (int8x8_t) __builtin_neon_vld1_v(__p0, 0); \
   7812   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7813   __ret; \
   7814 })
   7815 #endif
   7816 
   7817 #ifdef __LITTLE_ENDIAN__
   7818 #define vld1_f32(__p0) __extension__ ({ \
   7819   float32x2_t __ret; \
   7820   __ret = (float32x2_t) __builtin_neon_vld1_v(__p0, 9); \
   7821   __ret; \
   7822 })
   7823 #else
   7824 #define vld1_f32(__p0) __extension__ ({ \
   7825   float32x2_t __ret; \
   7826   __ret = (float32x2_t) __builtin_neon_vld1_v(__p0, 9); \
   7827   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   7828   __ret; \
   7829 })
   7830 #endif
   7831 
   7832 #ifdef __LITTLE_ENDIAN__
   7833 #define vld1_f16(__p0) __extension__ ({ \
   7834   float16x4_t __ret; \
   7835   __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \
   7836   __ret; \
   7837 })
   7838 #else
   7839 #define vld1_f16(__p0) __extension__ ({ \
   7840   float16x4_t __ret; \
   7841   __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \
   7842   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7843   __ret; \
   7844 })
   7845 #endif
   7846 
   7847 #ifdef __LITTLE_ENDIAN__
   7848 #define vld1_s32(__p0) __extension__ ({ \
   7849   int32x2_t __ret; \
   7850   __ret = (int32x2_t) __builtin_neon_vld1_v(__p0, 2); \
   7851   __ret; \
   7852 })
   7853 #else
   7854 #define vld1_s32(__p0) __extension__ ({ \
   7855   int32x2_t __ret; \
   7856   __ret = (int32x2_t) __builtin_neon_vld1_v(__p0, 2); \
   7857   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   7858   __ret; \
   7859 })
   7860 #endif
   7861 
   7862 #ifdef __LITTLE_ENDIAN__
   7863 #define vld1_s64(__p0) __extension__ ({ \
   7864   int64x1_t __ret; \
   7865   __ret = (int64x1_t) __builtin_neon_vld1_v(__p0, 3); \
   7866   __ret; \
   7867 })
   7868 #else
   7869 #define vld1_s64(__p0) __extension__ ({ \
   7870   int64x1_t __ret; \
   7871   __ret = (int64x1_t) __builtin_neon_vld1_v(__p0, 3); \
   7872   __ret; \
   7873 })
   7874 #endif
   7875 
   7876 #ifdef __LITTLE_ENDIAN__
   7877 #define vld1_s16(__p0) __extension__ ({ \
   7878   int16x4_t __ret; \
   7879   __ret = (int16x4_t) __builtin_neon_vld1_v(__p0, 1); \
   7880   __ret; \
   7881 })
   7882 #else
   7883 #define vld1_s16(__p0) __extension__ ({ \
   7884   int16x4_t __ret; \
   7885   __ret = (int16x4_t) __builtin_neon_vld1_v(__p0, 1); \
   7886   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7887   __ret; \
   7888 })
   7889 #endif
   7890 
   7891 #ifdef __LITTLE_ENDIAN__
   7892 #define vld1_dup_p8(__p0) __extension__ ({ \
   7893   poly8x8_t __ret; \
   7894   __ret = (poly8x8_t) __builtin_neon_vld1_dup_v(__p0, 4); \
   7895   __ret; \
   7896 })
   7897 #else
   7898 #define vld1_dup_p8(__p0) __extension__ ({ \
   7899   poly8x8_t __ret; \
   7900   __ret = (poly8x8_t) __builtin_neon_vld1_dup_v(__p0, 4); \
   7901   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7902   __ret; \
   7903 })
   7904 #endif
   7905 
   7906 #ifdef __LITTLE_ENDIAN__
   7907 #define vld1_dup_p16(__p0) __extension__ ({ \
   7908   poly16x4_t __ret; \
   7909   __ret = (poly16x4_t) __builtin_neon_vld1_dup_v(__p0, 5); \
   7910   __ret; \
   7911 })
   7912 #else
   7913 #define vld1_dup_p16(__p0) __extension__ ({ \
   7914   poly16x4_t __ret; \
   7915   __ret = (poly16x4_t) __builtin_neon_vld1_dup_v(__p0, 5); \
   7916   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7917   __ret; \
   7918 })
   7919 #endif
   7920 
   7921 #ifdef __LITTLE_ENDIAN__
   7922 #define vld1q_dup_p8(__p0) __extension__ ({ \
   7923   poly8x16_t __ret; \
   7924   __ret = (poly8x16_t) __builtin_neon_vld1q_dup_v(__p0, 36); \
   7925   __ret; \
   7926 })
   7927 #else
   7928 #define vld1q_dup_p8(__p0) __extension__ ({ \
   7929   poly8x16_t __ret; \
   7930   __ret = (poly8x16_t) __builtin_neon_vld1q_dup_v(__p0, 36); \
   7931   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   7932   __ret; \
   7933 })
   7934 #endif
   7935 
   7936 #ifdef __LITTLE_ENDIAN__
   7937 #define vld1q_dup_p16(__p0) __extension__ ({ \
   7938   poly16x8_t __ret; \
   7939   __ret = (poly16x8_t) __builtin_neon_vld1q_dup_v(__p0, 37); \
   7940   __ret; \
   7941 })
   7942 #else
   7943 #define vld1q_dup_p16(__p0) __extension__ ({ \
   7944   poly16x8_t __ret; \
   7945   __ret = (poly16x8_t) __builtin_neon_vld1q_dup_v(__p0, 37); \
   7946   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   7947   __ret; \
   7948 })
   7949 #endif
   7950 
   7951 #ifdef __LITTLE_ENDIAN__
   7952 #define vld1q_dup_u8(__p0) __extension__ ({ \
   7953   uint8x16_t __ret; \
   7954   __ret = (uint8x16_t) __builtin_neon_vld1q_dup_v(__p0, 48); \
   7955   __ret; \
   7956 })
   7957 #else
   7958 #define vld1q_dup_u8(__p0) __extension__ ({ \
   7959   uint8x16_t __ret; \
   7960   __ret = (uint8x16_t) __builtin_neon_vld1q_dup_v(__p0, 48); \
   7961   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   7962   __ret; \
   7963 })
   7964 #endif
   7965 
   7966 #ifdef __LITTLE_ENDIAN__
   7967 #define vld1q_dup_u32(__p0) __extension__ ({ \
   7968   uint32x4_t __ret; \
   7969   __ret = (uint32x4_t) __builtin_neon_vld1q_dup_v(__p0, 50); \
   7970   __ret; \
   7971 })
   7972 #else
   7973 #define vld1q_dup_u32(__p0) __extension__ ({ \
   7974   uint32x4_t __ret; \
   7975   __ret = (uint32x4_t) __builtin_neon_vld1q_dup_v(__p0, 50); \
   7976   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   7977   __ret; \
   7978 })
   7979 #endif
   7980 
   7981 #ifdef __LITTLE_ENDIAN__
   7982 #define vld1q_dup_u64(__p0) __extension__ ({ \
   7983   uint64x2_t __ret; \
   7984   __ret = (uint64x2_t) __builtin_neon_vld1q_dup_v(__p0, 51); \
   7985   __ret; \
   7986 })
   7987 #else
   7988 #define vld1q_dup_u64(__p0) __extension__ ({ \
   7989   uint64x2_t __ret; \
   7990   __ret = (uint64x2_t) __builtin_neon_vld1q_dup_v(__p0, 51); \
   7991   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   7992   __ret; \
   7993 })
   7994 #endif
   7995 
   7996 #ifdef __LITTLE_ENDIAN__
   7997 #define vld1q_dup_u16(__p0) __extension__ ({ \
   7998   uint16x8_t __ret; \
   7999   __ret = (uint16x8_t) __builtin_neon_vld1q_dup_v(__p0, 49); \
   8000   __ret; \
   8001 })
   8002 #else
   8003 #define vld1q_dup_u16(__p0) __extension__ ({ \
   8004   uint16x8_t __ret; \
   8005   __ret = (uint16x8_t) __builtin_neon_vld1q_dup_v(__p0, 49); \
   8006   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8007   __ret; \
   8008 })
   8009 #endif
   8010 
   8011 #ifdef __LITTLE_ENDIAN__
   8012 #define vld1q_dup_s8(__p0) __extension__ ({ \
   8013   int8x16_t __ret; \
   8014   __ret = (int8x16_t) __builtin_neon_vld1q_dup_v(__p0, 32); \
   8015   __ret; \
   8016 })
   8017 #else
   8018 #define vld1q_dup_s8(__p0) __extension__ ({ \
   8019   int8x16_t __ret; \
   8020   __ret = (int8x16_t) __builtin_neon_vld1q_dup_v(__p0, 32); \
   8021   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8022   __ret; \
   8023 })
   8024 #endif
   8025 
   8026 #ifdef __LITTLE_ENDIAN__
   8027 #define vld1q_dup_f32(__p0) __extension__ ({ \
   8028   float32x4_t __ret; \
   8029   __ret = (float32x4_t) __builtin_neon_vld1q_dup_v(__p0, 41); \
   8030   __ret; \
   8031 })
   8032 #else
   8033 #define vld1q_dup_f32(__p0) __extension__ ({ \
   8034   float32x4_t __ret; \
   8035   __ret = (float32x4_t) __builtin_neon_vld1q_dup_v(__p0, 41); \
   8036   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8037   __ret; \
   8038 })
   8039 #endif
   8040 
   8041 #ifdef __LITTLE_ENDIAN__
   8042 #define vld1q_dup_f16(__p0) __extension__ ({ \
   8043   float16x8_t __ret; \
   8044   __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \
   8045   __ret; \
   8046 })
   8047 #else
   8048 #define vld1q_dup_f16(__p0) __extension__ ({ \
   8049   float16x8_t __ret; \
   8050   __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \
   8051   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8052   __ret; \
   8053 })
   8054 #endif
   8055 
   8056 #ifdef __LITTLE_ENDIAN__
   8057 #define vld1q_dup_s32(__p0) __extension__ ({ \
   8058   int32x4_t __ret; \
   8059   __ret = (int32x4_t) __builtin_neon_vld1q_dup_v(__p0, 34); \
   8060   __ret; \
   8061 })
   8062 #else
   8063 #define vld1q_dup_s32(__p0) __extension__ ({ \
   8064   int32x4_t __ret; \
   8065   __ret = (int32x4_t) __builtin_neon_vld1q_dup_v(__p0, 34); \
   8066   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8067   __ret; \
   8068 })
   8069 #endif
   8070 
   8071 #ifdef __LITTLE_ENDIAN__
   8072 #define vld1q_dup_s64(__p0) __extension__ ({ \
   8073   int64x2_t __ret; \
   8074   __ret = (int64x2_t) __builtin_neon_vld1q_dup_v(__p0, 35); \
   8075   __ret; \
   8076 })
   8077 #else
   8078 #define vld1q_dup_s64(__p0) __extension__ ({ \
   8079   int64x2_t __ret; \
   8080   __ret = (int64x2_t) __builtin_neon_vld1q_dup_v(__p0, 35); \
   8081   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8082   __ret; \
   8083 })
   8084 #endif
   8085 
   8086 #ifdef __LITTLE_ENDIAN__
   8087 #define vld1q_dup_s16(__p0) __extension__ ({ \
   8088   int16x8_t __ret; \
   8089   __ret = (int16x8_t) __builtin_neon_vld1q_dup_v(__p0, 33); \
   8090   __ret; \
   8091 })
   8092 #else
   8093 #define vld1q_dup_s16(__p0) __extension__ ({ \
   8094   int16x8_t __ret; \
   8095   __ret = (int16x8_t) __builtin_neon_vld1q_dup_v(__p0, 33); \
   8096   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8097   __ret; \
   8098 })
   8099 #endif
   8100 
   8101 #ifdef __LITTLE_ENDIAN__
   8102 #define vld1_dup_u8(__p0) __extension__ ({ \
   8103   uint8x8_t __ret; \
   8104   __ret = (uint8x8_t) __builtin_neon_vld1_dup_v(__p0, 16); \
   8105   __ret; \
   8106 })
   8107 #else
   8108 #define vld1_dup_u8(__p0) __extension__ ({ \
   8109   uint8x8_t __ret; \
   8110   __ret = (uint8x8_t) __builtin_neon_vld1_dup_v(__p0, 16); \
   8111   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8112   __ret; \
   8113 })
   8114 #endif
   8115 
   8116 #ifdef __LITTLE_ENDIAN__
   8117 #define vld1_dup_u32(__p0) __extension__ ({ \
   8118   uint32x2_t __ret; \
   8119   __ret = (uint32x2_t) __builtin_neon_vld1_dup_v(__p0, 18); \
   8120   __ret; \
   8121 })
   8122 #else
   8123 #define vld1_dup_u32(__p0) __extension__ ({ \
   8124   uint32x2_t __ret; \
   8125   __ret = (uint32x2_t) __builtin_neon_vld1_dup_v(__p0, 18); \
   8126   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8127   __ret; \
   8128 })
   8129 #endif
   8130 
   8131 #ifdef __LITTLE_ENDIAN__
   8132 #define vld1_dup_u64(__p0) __extension__ ({ \
   8133   uint64x1_t __ret; \
   8134   __ret = (uint64x1_t) __builtin_neon_vld1_dup_v(__p0, 19); \
   8135   __ret; \
   8136 })
   8137 #else
   8138 #define vld1_dup_u64(__p0) __extension__ ({ \
   8139   uint64x1_t __ret; \
   8140   __ret = (uint64x1_t) __builtin_neon_vld1_dup_v(__p0, 19); \
   8141   __ret; \
   8142 })
   8143 #endif
   8144 
   8145 #ifdef __LITTLE_ENDIAN__
   8146 #define vld1_dup_u16(__p0) __extension__ ({ \
   8147   uint16x4_t __ret; \
   8148   __ret = (uint16x4_t) __builtin_neon_vld1_dup_v(__p0, 17); \
   8149   __ret; \
   8150 })
   8151 #else
   8152 #define vld1_dup_u16(__p0) __extension__ ({ \
   8153   uint16x4_t __ret; \
   8154   __ret = (uint16x4_t) __builtin_neon_vld1_dup_v(__p0, 17); \
   8155   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8156   __ret; \
   8157 })
   8158 #endif
   8159 
   8160 #ifdef __LITTLE_ENDIAN__
   8161 #define vld1_dup_s8(__p0) __extension__ ({ \
   8162   int8x8_t __ret; \
   8163   __ret = (int8x8_t) __builtin_neon_vld1_dup_v(__p0, 0); \
   8164   __ret; \
   8165 })
   8166 #else
   8167 #define vld1_dup_s8(__p0) __extension__ ({ \
   8168   int8x8_t __ret; \
   8169   __ret = (int8x8_t) __builtin_neon_vld1_dup_v(__p0, 0); \
   8170   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8171   __ret; \
   8172 })
   8173 #endif
   8174 
   8175 #ifdef __LITTLE_ENDIAN__
   8176 #define vld1_dup_f32(__p0) __extension__ ({ \
   8177   float32x2_t __ret; \
   8178   __ret = (float32x2_t) __builtin_neon_vld1_dup_v(__p0, 9); \
   8179   __ret; \
   8180 })
   8181 #else
   8182 #define vld1_dup_f32(__p0) __extension__ ({ \
   8183   float32x2_t __ret; \
   8184   __ret = (float32x2_t) __builtin_neon_vld1_dup_v(__p0, 9); \
   8185   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8186   __ret; \
   8187 })
   8188 #endif
   8189 
   8190 #ifdef __LITTLE_ENDIAN__
   8191 #define vld1_dup_f16(__p0) __extension__ ({ \
   8192   float16x4_t __ret; \
   8193   __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \
   8194   __ret; \
   8195 })
   8196 #else
   8197 #define vld1_dup_f16(__p0) __extension__ ({ \
   8198   float16x4_t __ret; \
   8199   __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \
   8200   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8201   __ret; \
   8202 })
   8203 #endif
   8204 
   8205 #ifdef __LITTLE_ENDIAN__
   8206 #define vld1_dup_s32(__p0) __extension__ ({ \
   8207   int32x2_t __ret; \
   8208   __ret = (int32x2_t) __builtin_neon_vld1_dup_v(__p0, 2); \
   8209   __ret; \
   8210 })
   8211 #else
   8212 #define vld1_dup_s32(__p0) __extension__ ({ \
   8213   int32x2_t __ret; \
   8214   __ret = (int32x2_t) __builtin_neon_vld1_dup_v(__p0, 2); \
   8215   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8216   __ret; \
   8217 })
   8218 #endif
   8219 
   8220 #ifdef __LITTLE_ENDIAN__
   8221 #define vld1_dup_s64(__p0) __extension__ ({ \
   8222   int64x1_t __ret; \
   8223   __ret = (int64x1_t) __builtin_neon_vld1_dup_v(__p0, 3); \
   8224   __ret; \
   8225 })
   8226 #else
   8227 #define vld1_dup_s64(__p0) __extension__ ({ \
   8228   int64x1_t __ret; \
   8229   __ret = (int64x1_t) __builtin_neon_vld1_dup_v(__p0, 3); \
   8230   __ret; \
   8231 })
   8232 #endif
   8233 
   8234 #ifdef __LITTLE_ENDIAN__
   8235 #define vld1_dup_s16(__p0) __extension__ ({ \
   8236   int16x4_t __ret; \
   8237   __ret = (int16x4_t) __builtin_neon_vld1_dup_v(__p0, 1); \
   8238   __ret; \
   8239 })
   8240 #else
   8241 #define vld1_dup_s16(__p0) __extension__ ({ \
   8242   int16x4_t __ret; \
   8243   __ret = (int16x4_t) __builtin_neon_vld1_dup_v(__p0, 1); \
   8244   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8245   __ret; \
   8246 })
   8247 #endif
   8248 
   8249 #ifdef __LITTLE_ENDIAN__
   8250 #define vld1_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   8251   poly8x8_t __s1 = __p1; \
   8252   poly8x8_t __ret; \
   8253   __ret = (poly8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 4); \
   8254   __ret; \
   8255 })
   8256 #else
   8257 #define vld1_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   8258   poly8x8_t __s1 = __p1; \
   8259   poly8x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   8260   poly8x8_t __ret; \
   8261   __ret = (poly8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 4); \
   8262   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8263   __ret; \
   8264 })
   8265 #endif
   8266 
   8267 #ifdef __LITTLE_ENDIAN__
   8268 #define vld1_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   8269   poly16x4_t __s1 = __p1; \
   8270   poly16x4_t __ret; \
   8271   __ret = (poly16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 5); \
   8272   __ret; \
   8273 })
   8274 #else
   8275 #define vld1_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   8276   poly16x4_t __s1 = __p1; \
   8277   poly16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   8278   poly16x4_t __ret; \
   8279   __ret = (poly16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 5); \
   8280   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8281   __ret; \
   8282 })
   8283 #endif
   8284 
   8285 #ifdef __LITTLE_ENDIAN__
   8286 #define vld1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   8287   poly8x16_t __s1 = __p1; \
   8288   poly8x16_t __ret; \
   8289   __ret = (poly8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 36); \
   8290   __ret; \
   8291 })
   8292 #else
   8293 #define vld1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   8294   poly8x16_t __s1 = __p1; \
   8295   poly8x16_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8296   poly8x16_t __ret; \
   8297   __ret = (poly8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 36); \
   8298   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8299   __ret; \
   8300 })
   8301 #endif
   8302 
   8303 #ifdef __LITTLE_ENDIAN__
   8304 #define vld1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   8305   poly16x8_t __s1 = __p1; \
   8306   poly16x8_t __ret; \
   8307   __ret = (poly16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 37); \
   8308   __ret; \
   8309 })
   8310 #else
   8311 #define vld1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   8312   poly16x8_t __s1 = __p1; \
   8313   poly16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   8314   poly16x8_t __ret; \
   8315   __ret = (poly16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 37); \
   8316   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8317   __ret; \
   8318 })
   8319 #endif
   8320 
   8321 #ifdef __LITTLE_ENDIAN__
   8322 #define vld1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \
   8323   uint8x16_t __s1 = __p1; \
   8324   uint8x16_t __ret; \
   8325   __ret = (uint8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 48); \
   8326   __ret; \
   8327 })
   8328 #else
   8329 #define vld1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \
   8330   uint8x16_t __s1 = __p1; \
   8331   uint8x16_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8332   uint8x16_t __ret; \
   8333   __ret = (uint8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 48); \
   8334   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8335   __ret; \
   8336 })
   8337 #endif
   8338 
   8339 #ifdef __LITTLE_ENDIAN__
   8340 #define vld1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   8341   uint32x4_t __s1 = __p1; \
   8342   uint32x4_t __ret; \
   8343   __ret = (uint32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 50); \
   8344   __ret; \
   8345 })
   8346 #else
   8347 #define vld1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   8348   uint32x4_t __s1 = __p1; \
   8349   uint32x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   8350   uint32x4_t __ret; \
   8351   __ret = (uint32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 50); \
   8352   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8353   __ret; \
   8354 })
   8355 #endif
   8356 
   8357 #ifdef __LITTLE_ENDIAN__
   8358 #define vld1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \
   8359   uint64x2_t __s1 = __p1; \
   8360   uint64x2_t __ret; \
   8361   __ret = (uint64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 51); \
   8362   __ret; \
   8363 })
   8364 #else
   8365 #define vld1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \
   8366   uint64x2_t __s1 = __p1; \
   8367   uint64x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   8368   uint64x2_t __ret; \
   8369   __ret = (uint64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 51); \
   8370   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8371   __ret; \
   8372 })
   8373 #endif
   8374 
   8375 #ifdef __LITTLE_ENDIAN__
   8376 #define vld1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   8377   uint16x8_t __s1 = __p1; \
   8378   uint16x8_t __ret; \
   8379   __ret = (uint16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 49); \
   8380   __ret; \
   8381 })
   8382 #else
   8383 #define vld1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   8384   uint16x8_t __s1 = __p1; \
   8385   uint16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   8386   uint16x8_t __ret; \
   8387   __ret = (uint16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 49); \
   8388   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8389   __ret; \
   8390 })
   8391 #endif
   8392 
   8393 #ifdef __LITTLE_ENDIAN__
   8394 #define vld1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \
   8395   int8x16_t __s1 = __p1; \
   8396   int8x16_t __ret; \
   8397   __ret = (int8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 32); \
   8398   __ret; \
   8399 })
   8400 #else
   8401 #define vld1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \
   8402   int8x16_t __s1 = __p1; \
   8403   int8x16_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8404   int8x16_t __ret; \
   8405   __ret = (int8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 32); \
   8406   __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8407   __ret; \
   8408 })
   8409 #endif
   8410 
   8411 #ifdef __LITTLE_ENDIAN__
   8412 #define vld1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   8413   float32x4_t __s1 = __p1; \
   8414   float32x4_t __ret; \
   8415   __ret = (float32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 41); \
   8416   __ret; \
   8417 })
   8418 #else
   8419 #define vld1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   8420   float32x4_t __s1 = __p1; \
   8421   float32x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   8422   float32x4_t __ret; \
   8423   __ret = (float32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 41); \
   8424   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8425   __ret; \
   8426 })
   8427 #endif
   8428 
   8429 #ifdef __LITTLE_ENDIAN__
   8430 #define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   8431   float16x8_t __s1 = __p1; \
   8432   float16x8_t __ret; \
   8433   __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \
   8434   __ret; \
   8435 })
   8436 #else
   8437 #define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   8438   float16x8_t __s1 = __p1; \
   8439   float16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   8440   float16x8_t __ret; \
   8441   __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \
   8442   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8443   __ret; \
   8444 })
   8445 #endif
   8446 
   8447 #ifdef __LITTLE_ENDIAN__
   8448 #define vld1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   8449   int32x4_t __s1 = __p1; \
   8450   int32x4_t __ret; \
   8451   __ret = (int32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 34); \
   8452   __ret; \
   8453 })
   8454 #else
   8455 #define vld1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   8456   int32x4_t __s1 = __p1; \
   8457   int32x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   8458   int32x4_t __ret; \
   8459   __ret = (int32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 34); \
   8460   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8461   __ret; \
   8462 })
   8463 #endif
   8464 
   8465 #ifdef __LITTLE_ENDIAN__
   8466 #define vld1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \
   8467   int64x2_t __s1 = __p1; \
   8468   int64x2_t __ret; \
   8469   __ret = (int64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 35); \
   8470   __ret; \
   8471 })
   8472 #else
   8473 #define vld1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \
   8474   int64x2_t __s1 = __p1; \
   8475   int64x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   8476   int64x2_t __ret; \
   8477   __ret = (int64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 35); \
   8478   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8479   __ret; \
   8480 })
   8481 #endif
   8482 
   8483 #ifdef __LITTLE_ENDIAN__
   8484 #define vld1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   8485   int16x8_t __s1 = __p1; \
   8486   int16x8_t __ret; \
   8487   __ret = (int16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 33); \
   8488   __ret; \
   8489 })
   8490 #else
   8491 #define vld1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   8492   int16x8_t __s1 = __p1; \
   8493   int16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   8494   int16x8_t __ret; \
   8495   __ret = (int16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 33); \
   8496   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8497   __ret; \
   8498 })
   8499 #endif
   8500 
   8501 #ifdef __LITTLE_ENDIAN__
   8502 #define vld1_lane_u8(__p0, __p1, __p2) __extension__ ({ \
   8503   uint8x8_t __s1 = __p1; \
   8504   uint8x8_t __ret; \
   8505   __ret = (uint8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 16); \
   8506   __ret; \
   8507 })
   8508 #else
   8509 #define vld1_lane_u8(__p0, __p1, __p2) __extension__ ({ \
   8510   uint8x8_t __s1 = __p1; \
   8511   uint8x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   8512   uint8x8_t __ret; \
   8513   __ret = (uint8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 16); \
   8514   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8515   __ret; \
   8516 })
   8517 #endif
   8518 
   8519 #ifdef __LITTLE_ENDIAN__
   8520 #define vld1_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   8521   uint32x2_t __s1 = __p1; \
   8522   uint32x2_t __ret; \
   8523   __ret = (uint32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 18); \
   8524   __ret; \
   8525 })
   8526 #else
   8527 #define vld1_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   8528   uint32x2_t __s1 = __p1; \
   8529   uint32x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   8530   uint32x2_t __ret; \
   8531   __ret = (uint32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 18); \
   8532   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8533   __ret; \
   8534 })
   8535 #endif
   8536 
   8537 #ifdef __LITTLE_ENDIAN__
   8538 #define vld1_lane_u64(__p0, __p1, __p2) __extension__ ({ \
   8539   uint64x1_t __s1 = __p1; \
   8540   uint64x1_t __ret; \
   8541   __ret = (uint64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \
   8542   __ret; \
   8543 })
   8544 #else
   8545 #define vld1_lane_u64(__p0, __p1, __p2) __extension__ ({ \
   8546   uint64x1_t __s1 = __p1; \
   8547   uint64x1_t __ret; \
   8548   __ret = (uint64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \
   8549   __ret; \
   8550 })
   8551 #endif
   8552 
   8553 #ifdef __LITTLE_ENDIAN__
   8554 #define vld1_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   8555   uint16x4_t __s1 = __p1; \
   8556   uint16x4_t __ret; \
   8557   __ret = (uint16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 17); \
   8558   __ret; \
   8559 })
   8560 #else
   8561 #define vld1_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   8562   uint16x4_t __s1 = __p1; \
   8563   uint16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   8564   uint16x4_t __ret; \
   8565   __ret = (uint16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 17); \
   8566   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8567   __ret; \
   8568 })
   8569 #endif
   8570 
   8571 #ifdef __LITTLE_ENDIAN__
   8572 #define vld1_lane_s8(__p0, __p1, __p2) __extension__ ({ \
   8573   int8x8_t __s1 = __p1; \
   8574   int8x8_t __ret; \
   8575   __ret = (int8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 0); \
   8576   __ret; \
   8577 })
   8578 #else
   8579 #define vld1_lane_s8(__p0, __p1, __p2) __extension__ ({ \
   8580   int8x8_t __s1 = __p1; \
   8581   int8x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \
   8582   int8x8_t __ret; \
   8583   __ret = (int8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 0); \
   8584   __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \
   8585   __ret; \
   8586 })
   8587 #endif
   8588 
   8589 #ifdef __LITTLE_ENDIAN__
   8590 #define vld1_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   8591   float32x2_t __s1 = __p1; \
   8592   float32x2_t __ret; \
   8593   __ret = (float32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 9); \
   8594   __ret; \
   8595 })
   8596 #else
   8597 #define vld1_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   8598   float32x2_t __s1 = __p1; \
   8599   float32x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   8600   float32x2_t __ret; \
   8601   __ret = (float32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 9); \
   8602   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8603   __ret; \
   8604 })
   8605 #endif
   8606 
   8607 #ifdef __LITTLE_ENDIAN__
   8608 #define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   8609   float16x4_t __s1 = __p1; \
   8610   float16x4_t __ret; \
   8611   __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \
   8612   __ret; \
   8613 })
   8614 #else
   8615 #define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   8616   float16x4_t __s1 = __p1; \
   8617   float16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   8618   float16x4_t __ret; \
   8619   __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \
   8620   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8621   __ret; \
   8622 })
   8623 #endif
   8624 
   8625 #ifdef __LITTLE_ENDIAN__
   8626 #define vld1_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   8627   int32x2_t __s1 = __p1; \
   8628   int32x2_t __ret; \
   8629   __ret = (int32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 2); \
   8630   __ret; \
   8631 })
   8632 #else
   8633 #define vld1_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   8634   int32x2_t __s1 = __p1; \
   8635   int32x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
   8636   int32x2_t __ret; \
   8637   __ret = (int32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 2); \
   8638   __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
   8639   __ret; \
   8640 })
   8641 #endif
   8642 
   8643 #ifdef __LITTLE_ENDIAN__
   8644 #define vld1_lane_s64(__p0, __p1, __p2) __extension__ ({ \
   8645   int64x1_t __s1 = __p1; \
   8646   int64x1_t __ret; \
   8647   __ret = (int64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \
   8648   __ret; \
   8649 })
   8650 #else
   8651 #define vld1_lane_s64(__p0, __p1, __p2) __extension__ ({ \
   8652   int64x1_t __s1 = __p1; \
   8653   int64x1_t __ret; \
   8654   __ret = (int64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \
   8655   __ret; \
   8656 })
   8657 #endif
   8658 
   8659 #ifdef __LITTLE_ENDIAN__
   8660 #define vld1_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   8661   int16x4_t __s1 = __p1; \
   8662   int16x4_t __ret; \
   8663   __ret = (int16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 1); \
   8664   __ret; \
   8665 })
   8666 #else
   8667 #define vld1_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   8668   int16x4_t __s1 = __p1; \
   8669   int16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
   8670   int16x4_t __ret; \
   8671   __ret = (int16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 1); \
   8672   __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
   8673   __ret; \
   8674 })
   8675 #endif
   8676 
   8677 #ifdef __LITTLE_ENDIAN__
   8678 #define vld2_p8(__p0) __extension__ ({ \
   8679   poly8x8x2_t __ret; \
   8680   __builtin_neon_vld2_v(&__ret, __p0, 4); \
   8681   __ret; \
   8682 })
   8683 #else
   8684 #define vld2_p8(__p0) __extension__ ({ \
   8685   poly8x8x2_t __ret; \
   8686   __builtin_neon_vld2_v(&__ret, __p0, 4); \
   8687  \
   8688   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   8689   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   8690   __ret; \
   8691 })
   8692 #endif
   8693 
   8694 #ifdef __LITTLE_ENDIAN__
   8695 #define vld2_p16(__p0) __extension__ ({ \
   8696   poly16x4x2_t __ret; \
   8697   __builtin_neon_vld2_v(&__ret, __p0, 5); \
   8698   __ret; \
   8699 })
   8700 #else
   8701 #define vld2_p16(__p0) __extension__ ({ \
   8702   poly16x4x2_t __ret; \
   8703   __builtin_neon_vld2_v(&__ret, __p0, 5); \
   8704  \
   8705   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   8706   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   8707   __ret; \
   8708 })
   8709 #endif
   8710 
   8711 #ifdef __LITTLE_ENDIAN__
   8712 #define vld2q_p8(__p0) __extension__ ({ \
   8713   poly8x16x2_t __ret; \
   8714   __builtin_neon_vld2q_v(&__ret, __p0, 36); \
   8715   __ret; \
   8716 })
   8717 #else
   8718 #define vld2q_p8(__p0) __extension__ ({ \
   8719   poly8x16x2_t __ret; \
   8720   __builtin_neon_vld2q_v(&__ret, __p0, 36); \
   8721  \
   8722   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8723   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8724   __ret; \
   8725 })
   8726 #endif
   8727 
   8728 #ifdef __LITTLE_ENDIAN__
   8729 #define vld2q_p16(__p0) __extension__ ({ \
   8730   poly16x8x2_t __ret; \
   8731   __builtin_neon_vld2q_v(&__ret, __p0, 37); \
   8732   __ret; \
   8733 })
   8734 #else
   8735 #define vld2q_p16(__p0) __extension__ ({ \
   8736   poly16x8x2_t __ret; \
   8737   __builtin_neon_vld2q_v(&__ret, __p0, 37); \
   8738  \
   8739   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   8740   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   8741   __ret; \
   8742 })
   8743 #endif
   8744 
   8745 #ifdef __LITTLE_ENDIAN__
   8746 #define vld2q_u8(__p0) __extension__ ({ \
   8747   uint8x16x2_t __ret; \
   8748   __builtin_neon_vld2q_v(&__ret, __p0, 48); \
   8749   __ret; \
   8750 })
   8751 #else
   8752 #define vld2q_u8(__p0) __extension__ ({ \
   8753   uint8x16x2_t __ret; \
   8754   __builtin_neon_vld2q_v(&__ret, __p0, 48); \
   8755  \
   8756   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8757   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8758   __ret; \
   8759 })
   8760 #endif
   8761 
   8762 #ifdef __LITTLE_ENDIAN__
   8763 #define vld2q_u32(__p0) __extension__ ({ \
   8764   uint32x4x2_t __ret; \
   8765   __builtin_neon_vld2q_v(&__ret, __p0, 50); \
   8766   __ret; \
   8767 })
   8768 #else
   8769 #define vld2q_u32(__p0) __extension__ ({ \
   8770   uint32x4x2_t __ret; \
   8771   __builtin_neon_vld2q_v(&__ret, __p0, 50); \
   8772  \
   8773   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   8774   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   8775   __ret; \
   8776 })
   8777 #endif
   8778 
   8779 #ifdef __LITTLE_ENDIAN__
   8780 #define vld2q_u16(__p0) __extension__ ({ \
   8781   uint16x8x2_t __ret; \
   8782   __builtin_neon_vld2q_v(&__ret, __p0, 49); \
   8783   __ret; \
   8784 })
   8785 #else
   8786 #define vld2q_u16(__p0) __extension__ ({ \
   8787   uint16x8x2_t __ret; \
   8788   __builtin_neon_vld2q_v(&__ret, __p0, 49); \
   8789  \
   8790   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   8791   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   8792   __ret; \
   8793 })
   8794 #endif
   8795 
   8796 #ifdef __LITTLE_ENDIAN__
   8797 #define vld2q_s8(__p0) __extension__ ({ \
   8798   int8x16x2_t __ret; \
   8799   __builtin_neon_vld2q_v(&__ret, __p0, 32); \
   8800   __ret; \
   8801 })
   8802 #else
   8803 #define vld2q_s8(__p0) __extension__ ({ \
   8804   int8x16x2_t __ret; \
   8805   __builtin_neon_vld2q_v(&__ret, __p0, 32); \
   8806  \
   8807   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8808   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   8809   __ret; \
   8810 })
   8811 #endif
   8812 
   8813 #ifdef __LITTLE_ENDIAN__
   8814 #define vld2q_f32(__p0) __extension__ ({ \
   8815   float32x4x2_t __ret; \
   8816   __builtin_neon_vld2q_v(&__ret, __p0, 41); \
   8817   __ret; \
   8818 })
   8819 #else
   8820 #define vld2q_f32(__p0) __extension__ ({ \
   8821   float32x4x2_t __ret; \
   8822   __builtin_neon_vld2q_v(&__ret, __p0, 41); \
   8823  \
   8824   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   8825   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   8826   __ret; \
   8827 })
   8828 #endif
   8829 
   8830 #ifdef __LITTLE_ENDIAN__
   8831 #define vld2q_f16(__p0) __extension__ ({ \
   8832   float16x8x2_t __ret; \
   8833   __builtin_neon_vld2q_v(&__ret, __p0, 40); \
   8834   __ret; \
   8835 })
   8836 #else
   8837 #define vld2q_f16(__p0) __extension__ ({ \
   8838   float16x8x2_t __ret; \
   8839   __builtin_neon_vld2q_v(&__ret, __p0, 40); \
   8840  \
   8841   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   8842   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   8843   __ret; \
   8844 })
   8845 #endif
   8846 
   8847 #ifdef __LITTLE_ENDIAN__
   8848 #define vld2q_s32(__p0) __extension__ ({ \
   8849   int32x4x2_t __ret; \
   8850   __builtin_neon_vld2q_v(&__ret, __p0, 34); \
   8851   __ret; \
   8852 })
   8853 #else
   8854 #define vld2q_s32(__p0) __extension__ ({ \
   8855   int32x4x2_t __ret; \
   8856   __builtin_neon_vld2q_v(&__ret, __p0, 34); \
   8857  \
   8858   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   8859   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   8860   __ret; \
   8861 })
   8862 #endif
   8863 
   8864 #ifdef __LITTLE_ENDIAN__
   8865 #define vld2q_s16(__p0) __extension__ ({ \
   8866   int16x8x2_t __ret; \
   8867   __builtin_neon_vld2q_v(&__ret, __p0, 33); \
   8868   __ret; \
   8869 })
   8870 #else
   8871 #define vld2q_s16(__p0) __extension__ ({ \
   8872   int16x8x2_t __ret; \
   8873   __builtin_neon_vld2q_v(&__ret, __p0, 33); \
   8874  \
   8875   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   8876   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   8877   __ret; \
   8878 })
   8879 #endif
   8880 
   8881 #ifdef __LITTLE_ENDIAN__
   8882 #define vld2_u8(__p0) __extension__ ({ \
   8883   uint8x8x2_t __ret; \
   8884   __builtin_neon_vld2_v(&__ret, __p0, 16); \
   8885   __ret; \
   8886 })
   8887 #else
   8888 #define vld2_u8(__p0) __extension__ ({ \
   8889   uint8x8x2_t __ret; \
   8890   __builtin_neon_vld2_v(&__ret, __p0, 16); \
   8891  \
   8892   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   8893   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   8894   __ret; \
   8895 })
   8896 #endif
   8897 
   8898 #ifdef __LITTLE_ENDIAN__
   8899 #define vld2_u32(__p0) __extension__ ({ \
   8900   uint32x2x2_t __ret; \
   8901   __builtin_neon_vld2_v(&__ret, __p0, 18); \
   8902   __ret; \
   8903 })
   8904 #else
   8905 #define vld2_u32(__p0) __extension__ ({ \
   8906   uint32x2x2_t __ret; \
   8907   __builtin_neon_vld2_v(&__ret, __p0, 18); \
   8908  \
   8909   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   8910   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   8911   __ret; \
   8912 })
   8913 #endif
   8914 
   8915 #ifdef __LITTLE_ENDIAN__
   8916 #define vld2_u64(__p0) __extension__ ({ \
   8917   uint64x1x2_t __ret; \
   8918   __builtin_neon_vld2_v(&__ret, __p0, 19); \
   8919   __ret; \
   8920 })
   8921 #else
   8922 #define vld2_u64(__p0) __extension__ ({ \
   8923   uint64x1x2_t __ret; \
   8924   __builtin_neon_vld2_v(&__ret, __p0, 19); \
   8925   __ret; \
   8926 })
   8927 #endif
   8928 
   8929 #ifdef __LITTLE_ENDIAN__
   8930 #define vld2_u16(__p0) __extension__ ({ \
   8931   uint16x4x2_t __ret; \
   8932   __builtin_neon_vld2_v(&__ret, __p0, 17); \
   8933   __ret; \
   8934 })
   8935 #else
   8936 #define vld2_u16(__p0) __extension__ ({ \
   8937   uint16x4x2_t __ret; \
   8938   __builtin_neon_vld2_v(&__ret, __p0, 17); \
   8939  \
   8940   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   8941   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   8942   __ret; \
   8943 })
   8944 #endif
   8945 
   8946 #ifdef __LITTLE_ENDIAN__
   8947 #define vld2_s8(__p0) __extension__ ({ \
   8948   int8x8x2_t __ret; \
   8949   __builtin_neon_vld2_v(&__ret, __p0, 0); \
   8950   __ret; \
   8951 })
   8952 #else
   8953 #define vld2_s8(__p0) __extension__ ({ \
   8954   int8x8x2_t __ret; \
   8955   __builtin_neon_vld2_v(&__ret, __p0, 0); \
   8956  \
   8957   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   8958   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   8959   __ret; \
   8960 })
   8961 #endif
   8962 
   8963 #ifdef __LITTLE_ENDIAN__
   8964 #define vld2_f32(__p0) __extension__ ({ \
   8965   float32x2x2_t __ret; \
   8966   __builtin_neon_vld2_v(&__ret, __p0, 9); \
   8967   __ret; \
   8968 })
   8969 #else
   8970 #define vld2_f32(__p0) __extension__ ({ \
   8971   float32x2x2_t __ret; \
   8972   __builtin_neon_vld2_v(&__ret, __p0, 9); \
   8973  \
   8974   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   8975   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   8976   __ret; \
   8977 })
   8978 #endif
   8979 
   8980 #ifdef __LITTLE_ENDIAN__
   8981 #define vld2_f16(__p0) __extension__ ({ \
   8982   float16x4x2_t __ret; \
   8983   __builtin_neon_vld2_v(&__ret, __p0, 8); \
   8984   __ret; \
   8985 })
   8986 #else
   8987 #define vld2_f16(__p0) __extension__ ({ \
   8988   float16x4x2_t __ret; \
   8989   __builtin_neon_vld2_v(&__ret, __p0, 8); \
   8990  \
   8991   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   8992   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   8993   __ret; \
   8994 })
   8995 #endif
   8996 
   8997 #ifdef __LITTLE_ENDIAN__
   8998 #define vld2_s32(__p0) __extension__ ({ \
   8999   int32x2x2_t __ret; \
   9000   __builtin_neon_vld2_v(&__ret, __p0, 2); \
   9001   __ret; \
   9002 })
   9003 #else
   9004 #define vld2_s32(__p0) __extension__ ({ \
   9005   int32x2x2_t __ret; \
   9006   __builtin_neon_vld2_v(&__ret, __p0, 2); \
   9007  \
   9008   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9009   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9010   __ret; \
   9011 })
   9012 #endif
   9013 
   9014 #ifdef __LITTLE_ENDIAN__
   9015 #define vld2_s64(__p0) __extension__ ({ \
   9016   int64x1x2_t __ret; \
   9017   __builtin_neon_vld2_v(&__ret, __p0, 3); \
   9018   __ret; \
   9019 })
   9020 #else
   9021 #define vld2_s64(__p0) __extension__ ({ \
   9022   int64x1x2_t __ret; \
   9023   __builtin_neon_vld2_v(&__ret, __p0, 3); \
   9024   __ret; \
   9025 })
   9026 #endif
   9027 
   9028 #ifdef __LITTLE_ENDIAN__
   9029 #define vld2_s16(__p0) __extension__ ({ \
   9030   int16x4x2_t __ret; \
   9031   __builtin_neon_vld2_v(&__ret, __p0, 1); \
   9032   __ret; \
   9033 })
   9034 #else
   9035 #define vld2_s16(__p0) __extension__ ({ \
   9036   int16x4x2_t __ret; \
   9037   __builtin_neon_vld2_v(&__ret, __p0, 1); \
   9038  \
   9039   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9040   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9041   __ret; \
   9042 })
   9043 #endif
   9044 
   9045 #ifdef __LITTLE_ENDIAN__
   9046 #define vld2_dup_p8(__p0) __extension__ ({ \
   9047   poly8x8x2_t __ret; \
   9048   __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \
   9049   __ret; \
   9050 })
   9051 #else
   9052 #define vld2_dup_p8(__p0) __extension__ ({ \
   9053   poly8x8x2_t __ret; \
   9054   __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \
   9055  \
   9056   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9057   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9058   __ret; \
   9059 })
   9060 #endif
   9061 
   9062 #ifdef __LITTLE_ENDIAN__
   9063 #define vld2_dup_p16(__p0) __extension__ ({ \
   9064   poly16x4x2_t __ret; \
   9065   __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \
   9066   __ret; \
   9067 })
   9068 #else
   9069 #define vld2_dup_p16(__p0) __extension__ ({ \
   9070   poly16x4x2_t __ret; \
   9071   __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \
   9072  \
   9073   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9074   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9075   __ret; \
   9076 })
   9077 #endif
   9078 
   9079 #ifdef __LITTLE_ENDIAN__
   9080 #define vld2_dup_u8(__p0) __extension__ ({ \
   9081   uint8x8x2_t __ret; \
   9082   __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \
   9083   __ret; \
   9084 })
   9085 #else
   9086 #define vld2_dup_u8(__p0) __extension__ ({ \
   9087   uint8x8x2_t __ret; \
   9088   __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \
   9089  \
   9090   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9091   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9092   __ret; \
   9093 })
   9094 #endif
   9095 
   9096 #ifdef __LITTLE_ENDIAN__
   9097 #define vld2_dup_u32(__p0) __extension__ ({ \
   9098   uint32x2x2_t __ret; \
   9099   __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \
   9100   __ret; \
   9101 })
   9102 #else
   9103 #define vld2_dup_u32(__p0) __extension__ ({ \
   9104   uint32x2x2_t __ret; \
   9105   __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \
   9106  \
   9107   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9108   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9109   __ret; \
   9110 })
   9111 #endif
   9112 
   9113 #ifdef __LITTLE_ENDIAN__
   9114 #define vld2_dup_u64(__p0) __extension__ ({ \
   9115   uint64x1x2_t __ret; \
   9116   __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \
   9117   __ret; \
   9118 })
   9119 #else
   9120 #define vld2_dup_u64(__p0) __extension__ ({ \
   9121   uint64x1x2_t __ret; \
   9122   __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \
   9123   __ret; \
   9124 })
   9125 #endif
   9126 
   9127 #ifdef __LITTLE_ENDIAN__
   9128 #define vld2_dup_u16(__p0) __extension__ ({ \
   9129   uint16x4x2_t __ret; \
   9130   __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \
   9131   __ret; \
   9132 })
   9133 #else
   9134 #define vld2_dup_u16(__p0) __extension__ ({ \
   9135   uint16x4x2_t __ret; \
   9136   __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \
   9137  \
   9138   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9139   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9140   __ret; \
   9141 })
   9142 #endif
   9143 
   9144 #ifdef __LITTLE_ENDIAN__
   9145 #define vld2_dup_s8(__p0) __extension__ ({ \
   9146   int8x8x2_t __ret; \
   9147   __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \
   9148   __ret; \
   9149 })
   9150 #else
   9151 #define vld2_dup_s8(__p0) __extension__ ({ \
   9152   int8x8x2_t __ret; \
   9153   __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \
   9154  \
   9155   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9156   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9157   __ret; \
   9158 })
   9159 #endif
   9160 
   9161 #ifdef __LITTLE_ENDIAN__
   9162 #define vld2_dup_f32(__p0) __extension__ ({ \
   9163   float32x2x2_t __ret; \
   9164   __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \
   9165   __ret; \
   9166 })
   9167 #else
   9168 #define vld2_dup_f32(__p0) __extension__ ({ \
   9169   float32x2x2_t __ret; \
   9170   __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \
   9171  \
   9172   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9173   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9174   __ret; \
   9175 })
   9176 #endif
   9177 
   9178 #ifdef __LITTLE_ENDIAN__
   9179 #define vld2_dup_f16(__p0) __extension__ ({ \
   9180   float16x4x2_t __ret; \
   9181   __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \
   9182   __ret; \
   9183 })
   9184 #else
   9185 #define vld2_dup_f16(__p0) __extension__ ({ \
   9186   float16x4x2_t __ret; \
   9187   __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \
   9188  \
   9189   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9190   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9191   __ret; \
   9192 })
   9193 #endif
   9194 
   9195 #ifdef __LITTLE_ENDIAN__
   9196 #define vld2_dup_s32(__p0) __extension__ ({ \
   9197   int32x2x2_t __ret; \
   9198   __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \
   9199   __ret; \
   9200 })
   9201 #else
   9202 #define vld2_dup_s32(__p0) __extension__ ({ \
   9203   int32x2x2_t __ret; \
   9204   __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \
   9205  \
   9206   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9207   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9208   __ret; \
   9209 })
   9210 #endif
   9211 
   9212 #ifdef __LITTLE_ENDIAN__
   9213 #define vld2_dup_s64(__p0) __extension__ ({ \
   9214   int64x1x2_t __ret; \
   9215   __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \
   9216   __ret; \
   9217 })
   9218 #else
   9219 #define vld2_dup_s64(__p0) __extension__ ({ \
   9220   int64x1x2_t __ret; \
   9221   __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \
   9222   __ret; \
   9223 })
   9224 #endif
   9225 
   9226 #ifdef __LITTLE_ENDIAN__
   9227 #define vld2_dup_s16(__p0) __extension__ ({ \
   9228   int16x4x2_t __ret; \
   9229   __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \
   9230   __ret; \
   9231 })
   9232 #else
   9233 #define vld2_dup_s16(__p0) __extension__ ({ \
   9234   int16x4x2_t __ret; \
   9235   __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \
   9236  \
   9237   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9238   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9239   __ret; \
   9240 })
   9241 #endif
   9242 
   9243 #ifdef __LITTLE_ENDIAN__
   9244 #define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   9245   poly8x8x2_t __s1 = __p1; \
   9246   poly8x8x2_t __ret; \
   9247   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 4); \
   9248   __ret; \
   9249 })
   9250 #else
   9251 #define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   9252   poly8x8x2_t __s1 = __p1; \
   9253   poly8x8x2_t __rev1; \
   9254   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9255   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9256   poly8x8x2_t __ret; \
   9257   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 4); \
   9258  \
   9259   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9260   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9261   __ret; \
   9262 })
   9263 #endif
   9264 
   9265 #ifdef __LITTLE_ENDIAN__
   9266 #define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   9267   poly16x4x2_t __s1 = __p1; \
   9268   poly16x4x2_t __ret; \
   9269   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 5); \
   9270   __ret; \
   9271 })
   9272 #else
   9273 #define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   9274   poly16x4x2_t __s1 = __p1; \
   9275   poly16x4x2_t __rev1; \
   9276   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   9277   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   9278   poly16x4x2_t __ret; \
   9279   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 5); \
   9280  \
   9281   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9282   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9283   __ret; \
   9284 })
   9285 #endif
   9286 
   9287 #ifdef __LITTLE_ENDIAN__
   9288 #define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   9289   poly16x8x2_t __s1 = __p1; \
   9290   poly16x8x2_t __ret; \
   9291   __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 37); \
   9292   __ret; \
   9293 })
   9294 #else
   9295 #define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   9296   poly16x8x2_t __s1 = __p1; \
   9297   poly16x8x2_t __rev1; \
   9298   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9299   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9300   poly16x8x2_t __ret; \
   9301   __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 37); \
   9302  \
   9303   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9304   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9305   __ret; \
   9306 })
   9307 #endif
   9308 
   9309 #ifdef __LITTLE_ENDIAN__
   9310 #define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   9311   uint32x4x2_t __s1 = __p1; \
   9312   uint32x4x2_t __ret; \
   9313   __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 50); \
   9314   __ret; \
   9315 })
   9316 #else
   9317 #define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   9318   uint32x4x2_t __s1 = __p1; \
   9319   uint32x4x2_t __rev1; \
   9320   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   9321   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   9322   uint32x4x2_t __ret; \
   9323   __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 50); \
   9324  \
   9325   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9326   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9327   __ret; \
   9328 })
   9329 #endif
   9330 
   9331 #ifdef __LITTLE_ENDIAN__
   9332 #define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   9333   uint16x8x2_t __s1 = __p1; \
   9334   uint16x8x2_t __ret; \
   9335   __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 49); \
   9336   __ret; \
   9337 })
   9338 #else
   9339 #define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   9340   uint16x8x2_t __s1 = __p1; \
   9341   uint16x8x2_t __rev1; \
   9342   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9343   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9344   uint16x8x2_t __ret; \
   9345   __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 49); \
   9346  \
   9347   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9348   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9349   __ret; \
   9350 })
   9351 #endif
   9352 
   9353 #ifdef __LITTLE_ENDIAN__
   9354 #define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   9355   float32x4x2_t __s1 = __p1; \
   9356   float32x4x2_t __ret; \
   9357   __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 41); \
   9358   __ret; \
   9359 })
   9360 #else
   9361 #define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   9362   float32x4x2_t __s1 = __p1; \
   9363   float32x4x2_t __rev1; \
   9364   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   9365   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   9366   float32x4x2_t __ret; \
   9367   __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 41); \
   9368  \
   9369   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9370   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9371   __ret; \
   9372 })
   9373 #endif
   9374 
   9375 #ifdef __LITTLE_ENDIAN__
   9376 #define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   9377   float16x8x2_t __s1 = __p1; \
   9378   float16x8x2_t __ret; \
   9379   __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 40); \
   9380   __ret; \
   9381 })
   9382 #else
   9383 #define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   9384   float16x8x2_t __s1 = __p1; \
   9385   float16x8x2_t __rev1; \
   9386   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9387   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9388   float16x8x2_t __ret; \
   9389   __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 40); \
   9390  \
   9391   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9392   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9393   __ret; \
   9394 })
   9395 #endif
   9396 
   9397 #ifdef __LITTLE_ENDIAN__
   9398 #define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   9399   int32x4x2_t __s1 = __p1; \
   9400   int32x4x2_t __ret; \
   9401   __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 34); \
   9402   __ret; \
   9403 })
   9404 #else
   9405 #define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   9406   int32x4x2_t __s1 = __p1; \
   9407   int32x4x2_t __rev1; \
   9408   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   9409   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   9410   int32x4x2_t __ret; \
   9411   __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 34); \
   9412  \
   9413   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9414   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9415   __ret; \
   9416 })
   9417 #endif
   9418 
   9419 #ifdef __LITTLE_ENDIAN__
   9420 #define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   9421   int16x8x2_t __s1 = __p1; \
   9422   int16x8x2_t __ret; \
   9423   __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 33); \
   9424   __ret; \
   9425 })
   9426 #else
   9427 #define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   9428   int16x8x2_t __s1 = __p1; \
   9429   int16x8x2_t __rev1; \
   9430   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9431   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9432   int16x8x2_t __ret; \
   9433   __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 33); \
   9434  \
   9435   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9436   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9437   __ret; \
   9438 })
   9439 #endif
   9440 
   9441 #ifdef __LITTLE_ENDIAN__
   9442 #define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \
   9443   uint8x8x2_t __s1 = __p1; \
   9444   uint8x8x2_t __ret; \
   9445   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 16); \
   9446   __ret; \
   9447 })
   9448 #else
   9449 #define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \
   9450   uint8x8x2_t __s1 = __p1; \
   9451   uint8x8x2_t __rev1; \
   9452   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9453   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9454   uint8x8x2_t __ret; \
   9455   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 16); \
   9456  \
   9457   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9458   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9459   __ret; \
   9460 })
   9461 #endif
   9462 
   9463 #ifdef __LITTLE_ENDIAN__
   9464 #define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   9465   uint32x2x2_t __s1 = __p1; \
   9466   uint32x2x2_t __ret; \
   9467   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 18); \
   9468   __ret; \
   9469 })
   9470 #else
   9471 #define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   9472   uint32x2x2_t __s1 = __p1; \
   9473   uint32x2x2_t __rev1; \
   9474   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \
   9475   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \
   9476   uint32x2x2_t __ret; \
   9477   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 18); \
   9478  \
   9479   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9480   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9481   __ret; \
   9482 })
   9483 #endif
   9484 
   9485 #ifdef __LITTLE_ENDIAN__
   9486 #define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   9487   uint16x4x2_t __s1 = __p1; \
   9488   uint16x4x2_t __ret; \
   9489   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 17); \
   9490   __ret; \
   9491 })
   9492 #else
   9493 #define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \
   9494   uint16x4x2_t __s1 = __p1; \
   9495   uint16x4x2_t __rev1; \
   9496   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   9497   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   9498   uint16x4x2_t __ret; \
   9499   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 17); \
   9500  \
   9501   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9502   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9503   __ret; \
   9504 })
   9505 #endif
   9506 
   9507 #ifdef __LITTLE_ENDIAN__
   9508 #define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \
   9509   int8x8x2_t __s1 = __p1; \
   9510   int8x8x2_t __ret; \
   9511   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 0); \
   9512   __ret; \
   9513 })
   9514 #else
   9515 #define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \
   9516   int8x8x2_t __s1 = __p1; \
   9517   int8x8x2_t __rev1; \
   9518   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9519   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9520   int8x8x2_t __ret; \
   9521   __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 0); \
   9522  \
   9523   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9524   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9525   __ret; \
   9526 })
   9527 #endif
   9528 
   9529 #ifdef __LITTLE_ENDIAN__
   9530 #define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   9531   float32x2x2_t __s1 = __p1; \
   9532   float32x2x2_t __ret; \
   9533   __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 9); \
   9534   __ret; \
   9535 })
   9536 #else
   9537 #define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \
   9538   float32x2x2_t __s1 = __p1; \
   9539   float32x2x2_t __rev1; \
   9540   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \
   9541   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \
   9542   float32x2x2_t __ret; \
   9543   __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 9); \
   9544  \
   9545   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9546   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9547   __ret; \
   9548 })
   9549 #endif
   9550 
   9551 #ifdef __LITTLE_ENDIAN__
   9552 #define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   9553   float16x4x2_t __s1 = __p1; \
   9554   float16x4x2_t __ret; \
   9555   __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 8); \
   9556   __ret; \
   9557 })
   9558 #else
   9559 #define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \
   9560   float16x4x2_t __s1 = __p1; \
   9561   float16x4x2_t __rev1; \
   9562   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   9563   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   9564   float16x4x2_t __ret; \
   9565   __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 8); \
   9566  \
   9567   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9568   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9569   __ret; \
   9570 })
   9571 #endif
   9572 
   9573 #ifdef __LITTLE_ENDIAN__
   9574 #define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   9575   int32x2x2_t __s1 = __p1; \
   9576   int32x2x2_t __ret; \
   9577   __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 2); \
   9578   __ret; \
   9579 })
   9580 #else
   9581 #define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \
   9582   int32x2x2_t __s1 = __p1; \
   9583   int32x2x2_t __rev1; \
   9584   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \
   9585   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \
   9586   int32x2x2_t __ret; \
   9587   __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 2); \
   9588  \
   9589   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9590   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9591   __ret; \
   9592 })
   9593 #endif
   9594 
   9595 #ifdef __LITTLE_ENDIAN__
   9596 #define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   9597   int16x4x2_t __s1 = __p1; \
   9598   int16x4x2_t __ret; \
   9599   __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 1); \
   9600   __ret; \
   9601 })
   9602 #else
   9603 #define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \
   9604   int16x4x2_t __s1 = __p1; \
   9605   int16x4x2_t __rev1; \
   9606   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   9607   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   9608   int16x4x2_t __ret; \
   9609   __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 1); \
   9610  \
   9611   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9612   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9613   __ret; \
   9614 })
   9615 #endif
   9616 
   9617 #ifdef __LITTLE_ENDIAN__
   9618 #define vld3_p8(__p0) __extension__ ({ \
   9619   poly8x8x3_t __ret; \
   9620   __builtin_neon_vld3_v(&__ret, __p0, 4); \
   9621   __ret; \
   9622 })
   9623 #else
   9624 #define vld3_p8(__p0) __extension__ ({ \
   9625   poly8x8x3_t __ret; \
   9626   __builtin_neon_vld3_v(&__ret, __p0, 4); \
   9627  \
   9628   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9629   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9630   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   9631   __ret; \
   9632 })
   9633 #endif
   9634 
   9635 #ifdef __LITTLE_ENDIAN__
   9636 #define vld3_p16(__p0) __extension__ ({ \
   9637   poly16x4x3_t __ret; \
   9638   __builtin_neon_vld3_v(&__ret, __p0, 5); \
   9639   __ret; \
   9640 })
   9641 #else
   9642 #define vld3_p16(__p0) __extension__ ({ \
   9643   poly16x4x3_t __ret; \
   9644   __builtin_neon_vld3_v(&__ret, __p0, 5); \
   9645  \
   9646   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9647   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9648   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   9649   __ret; \
   9650 })
   9651 #endif
   9652 
   9653 #ifdef __LITTLE_ENDIAN__
   9654 #define vld3q_p8(__p0) __extension__ ({ \
   9655   poly8x16x3_t __ret; \
   9656   __builtin_neon_vld3q_v(&__ret, __p0, 36); \
   9657   __ret; \
   9658 })
   9659 #else
   9660 #define vld3q_p8(__p0) __extension__ ({ \
   9661   poly8x16x3_t __ret; \
   9662   __builtin_neon_vld3q_v(&__ret, __p0, 36); \
   9663  \
   9664   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9665   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9666   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9667   __ret; \
   9668 })
   9669 #endif
   9670 
   9671 #ifdef __LITTLE_ENDIAN__
   9672 #define vld3q_p16(__p0) __extension__ ({ \
   9673   poly16x8x3_t __ret; \
   9674   __builtin_neon_vld3q_v(&__ret, __p0, 37); \
   9675   __ret; \
   9676 })
   9677 #else
   9678 #define vld3q_p16(__p0) __extension__ ({ \
   9679   poly16x8x3_t __ret; \
   9680   __builtin_neon_vld3q_v(&__ret, __p0, 37); \
   9681  \
   9682   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9683   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9684   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   9685   __ret; \
   9686 })
   9687 #endif
   9688 
   9689 #ifdef __LITTLE_ENDIAN__
   9690 #define vld3q_u8(__p0) __extension__ ({ \
   9691   uint8x16x3_t __ret; \
   9692   __builtin_neon_vld3q_v(&__ret, __p0, 48); \
   9693   __ret; \
   9694 })
   9695 #else
   9696 #define vld3q_u8(__p0) __extension__ ({ \
   9697   uint8x16x3_t __ret; \
   9698   __builtin_neon_vld3q_v(&__ret, __p0, 48); \
   9699  \
   9700   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9701   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9702   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9703   __ret; \
   9704 })
   9705 #endif
   9706 
   9707 #ifdef __LITTLE_ENDIAN__
   9708 #define vld3q_u32(__p0) __extension__ ({ \
   9709   uint32x4x3_t __ret; \
   9710   __builtin_neon_vld3q_v(&__ret, __p0, 50); \
   9711   __ret; \
   9712 })
   9713 #else
   9714 #define vld3q_u32(__p0) __extension__ ({ \
   9715   uint32x4x3_t __ret; \
   9716   __builtin_neon_vld3q_v(&__ret, __p0, 50); \
   9717  \
   9718   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9719   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9720   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   9721   __ret; \
   9722 })
   9723 #endif
   9724 
   9725 #ifdef __LITTLE_ENDIAN__
   9726 #define vld3q_u16(__p0) __extension__ ({ \
   9727   uint16x8x3_t __ret; \
   9728   __builtin_neon_vld3q_v(&__ret, __p0, 49); \
   9729   __ret; \
   9730 })
   9731 #else
   9732 #define vld3q_u16(__p0) __extension__ ({ \
   9733   uint16x8x3_t __ret; \
   9734   __builtin_neon_vld3q_v(&__ret, __p0, 49); \
   9735  \
   9736   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9737   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9738   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   9739   __ret; \
   9740 })
   9741 #endif
   9742 
   9743 #ifdef __LITTLE_ENDIAN__
   9744 #define vld3q_s8(__p0) __extension__ ({ \
   9745   int8x16x3_t __ret; \
   9746   __builtin_neon_vld3q_v(&__ret, __p0, 32); \
   9747   __ret; \
   9748 })
   9749 #else
   9750 #define vld3q_s8(__p0) __extension__ ({ \
   9751   int8x16x3_t __ret; \
   9752   __builtin_neon_vld3q_v(&__ret, __p0, 32); \
   9753  \
   9754   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9755   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9756   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
   9757   __ret; \
   9758 })
   9759 #endif
   9760 
   9761 #ifdef __LITTLE_ENDIAN__
   9762 #define vld3q_f32(__p0) __extension__ ({ \
   9763   float32x4x3_t __ret; \
   9764   __builtin_neon_vld3q_v(&__ret, __p0, 41); \
   9765   __ret; \
   9766 })
   9767 #else
   9768 #define vld3q_f32(__p0) __extension__ ({ \
   9769   float32x4x3_t __ret; \
   9770   __builtin_neon_vld3q_v(&__ret, __p0, 41); \
   9771  \
   9772   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9773   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9774   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   9775   __ret; \
   9776 })
   9777 #endif
   9778 
   9779 #ifdef __LITTLE_ENDIAN__
   9780 #define vld3q_f16(__p0) __extension__ ({ \
   9781   float16x8x3_t __ret; \
   9782   __builtin_neon_vld3q_v(&__ret, __p0, 40); \
   9783   __ret; \
   9784 })
   9785 #else
   9786 #define vld3q_f16(__p0) __extension__ ({ \
   9787   float16x8x3_t __ret; \
   9788   __builtin_neon_vld3q_v(&__ret, __p0, 40); \
   9789  \
   9790   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9791   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9792   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   9793   __ret; \
   9794 })
   9795 #endif
   9796 
   9797 #ifdef __LITTLE_ENDIAN__
   9798 #define vld3q_s32(__p0) __extension__ ({ \
   9799   int32x4x3_t __ret; \
   9800   __builtin_neon_vld3q_v(&__ret, __p0, 34); \
   9801   __ret; \
   9802 })
   9803 #else
   9804 #define vld3q_s32(__p0) __extension__ ({ \
   9805   int32x4x3_t __ret; \
   9806   __builtin_neon_vld3q_v(&__ret, __p0, 34); \
   9807  \
   9808   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9809   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9810   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   9811   __ret; \
   9812 })
   9813 #endif
   9814 
   9815 #ifdef __LITTLE_ENDIAN__
   9816 #define vld3q_s16(__p0) __extension__ ({ \
   9817   int16x8x3_t __ret; \
   9818   __builtin_neon_vld3q_v(&__ret, __p0, 33); \
   9819   __ret; \
   9820 })
   9821 #else
   9822 #define vld3q_s16(__p0) __extension__ ({ \
   9823   int16x8x3_t __ret; \
   9824   __builtin_neon_vld3q_v(&__ret, __p0, 33); \
   9825  \
   9826   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9827   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9828   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   9829   __ret; \
   9830 })
   9831 #endif
   9832 
   9833 #ifdef __LITTLE_ENDIAN__
   9834 #define vld3_u8(__p0) __extension__ ({ \
   9835   uint8x8x3_t __ret; \
   9836   __builtin_neon_vld3_v(&__ret, __p0, 16); \
   9837   __ret; \
   9838 })
   9839 #else
   9840 #define vld3_u8(__p0) __extension__ ({ \
   9841   uint8x8x3_t __ret; \
   9842   __builtin_neon_vld3_v(&__ret, __p0, 16); \
   9843  \
   9844   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9845   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9846   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   9847   __ret; \
   9848 })
   9849 #endif
   9850 
   9851 #ifdef __LITTLE_ENDIAN__
   9852 #define vld3_u32(__p0) __extension__ ({ \
   9853   uint32x2x3_t __ret; \
   9854   __builtin_neon_vld3_v(&__ret, __p0, 18); \
   9855   __ret; \
   9856 })
   9857 #else
   9858 #define vld3_u32(__p0) __extension__ ({ \
   9859   uint32x2x3_t __ret; \
   9860   __builtin_neon_vld3_v(&__ret, __p0, 18); \
   9861  \
   9862   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9863   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9864   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \
   9865   __ret; \
   9866 })
   9867 #endif
   9868 
   9869 #ifdef __LITTLE_ENDIAN__
   9870 #define vld3_u64(__p0) __extension__ ({ \
   9871   uint64x1x3_t __ret; \
   9872   __builtin_neon_vld3_v(&__ret, __p0, 19); \
   9873   __ret; \
   9874 })
   9875 #else
   9876 #define vld3_u64(__p0) __extension__ ({ \
   9877   uint64x1x3_t __ret; \
   9878   __builtin_neon_vld3_v(&__ret, __p0, 19); \
   9879   __ret; \
   9880 })
   9881 #endif
   9882 
   9883 #ifdef __LITTLE_ENDIAN__
   9884 #define vld3_u16(__p0) __extension__ ({ \
   9885   uint16x4x3_t __ret; \
   9886   __builtin_neon_vld3_v(&__ret, __p0, 17); \
   9887   __ret; \
   9888 })
   9889 #else
   9890 #define vld3_u16(__p0) __extension__ ({ \
   9891   uint16x4x3_t __ret; \
   9892   __builtin_neon_vld3_v(&__ret, __p0, 17); \
   9893  \
   9894   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9895   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9896   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   9897   __ret; \
   9898 })
   9899 #endif
   9900 
   9901 #ifdef __LITTLE_ENDIAN__
   9902 #define vld3_s8(__p0) __extension__ ({ \
   9903   int8x8x3_t __ret; \
   9904   __builtin_neon_vld3_v(&__ret, __p0, 0); \
   9905   __ret; \
   9906 })
   9907 #else
   9908 #define vld3_s8(__p0) __extension__ ({ \
   9909   int8x8x3_t __ret; \
   9910   __builtin_neon_vld3_v(&__ret, __p0, 0); \
   9911  \
   9912   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   9913   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   9914   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   9915   __ret; \
   9916 })
   9917 #endif
   9918 
   9919 #ifdef __LITTLE_ENDIAN__
   9920 #define vld3_f32(__p0) __extension__ ({ \
   9921   float32x2x3_t __ret; \
   9922   __builtin_neon_vld3_v(&__ret, __p0, 9); \
   9923   __ret; \
   9924 })
   9925 #else
   9926 #define vld3_f32(__p0) __extension__ ({ \
   9927   float32x2x3_t __ret; \
   9928   __builtin_neon_vld3_v(&__ret, __p0, 9); \
   9929  \
   9930   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9931   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9932   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \
   9933   __ret; \
   9934 })
   9935 #endif
   9936 
   9937 #ifdef __LITTLE_ENDIAN__
   9938 #define vld3_f16(__p0) __extension__ ({ \
   9939   float16x4x3_t __ret; \
   9940   __builtin_neon_vld3_v(&__ret, __p0, 8); \
   9941   __ret; \
   9942 })
   9943 #else
   9944 #define vld3_f16(__p0) __extension__ ({ \
   9945   float16x4x3_t __ret; \
   9946   __builtin_neon_vld3_v(&__ret, __p0, 8); \
   9947  \
   9948   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9949   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   9950   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   9951   __ret; \
   9952 })
   9953 #endif
   9954 
   9955 #ifdef __LITTLE_ENDIAN__
   9956 #define vld3_s32(__p0) __extension__ ({ \
   9957   int32x2x3_t __ret; \
   9958   __builtin_neon_vld3_v(&__ret, __p0, 2); \
   9959   __ret; \
   9960 })
   9961 #else
   9962 #define vld3_s32(__p0) __extension__ ({ \
   9963   int32x2x3_t __ret; \
   9964   __builtin_neon_vld3_v(&__ret, __p0, 2); \
   9965  \
   9966   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   9967   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   9968   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \
   9969   __ret; \
   9970 })
   9971 #endif
   9972 
   9973 #ifdef __LITTLE_ENDIAN__
   9974 #define vld3_s64(__p0) __extension__ ({ \
   9975   int64x1x3_t __ret; \
   9976   __builtin_neon_vld3_v(&__ret, __p0, 3); \
   9977   __ret; \
   9978 })
   9979 #else
   9980 #define vld3_s64(__p0) __extension__ ({ \
   9981   int64x1x3_t __ret; \
   9982   __builtin_neon_vld3_v(&__ret, __p0, 3); \
   9983   __ret; \
   9984 })
   9985 #endif
   9986 
   9987 #ifdef __LITTLE_ENDIAN__
   9988 #define vld3_s16(__p0) __extension__ ({ \
   9989   int16x4x3_t __ret; \
   9990   __builtin_neon_vld3_v(&__ret, __p0, 1); \
   9991   __ret; \
   9992 })
   9993 #else
   9994 #define vld3_s16(__p0) __extension__ ({ \
   9995   int16x4x3_t __ret; \
   9996   __builtin_neon_vld3_v(&__ret, __p0, 1); \
   9997  \
   9998   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   9999   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   10000   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   10001   __ret; \
   10002 })
   10003 #endif
   10004 
   10005 #ifdef __LITTLE_ENDIAN__
   10006 #define vld3_dup_p8(__p0) __extension__ ({ \
   10007   poly8x8x3_t __ret; \
   10008   __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \
   10009   __ret; \
   10010 })
   10011 #else
   10012 #define vld3_dup_p8(__p0) __extension__ ({ \
   10013   poly8x8x3_t __ret; \
   10014   __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \
   10015  \
   10016   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   10017   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   10018   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   10019   __ret; \
   10020 })
   10021 #endif
   10022 
   10023 #ifdef __LITTLE_ENDIAN__
   10024 #define vld3_dup_p16(__p0) __extension__ ({ \
   10025   poly16x4x3_t __ret; \
   10026   __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \
   10027   __ret; \
   10028 })
   10029 #else
   10030 #define vld3_dup_p16(__p0) __extension__ ({ \
   10031   poly16x4x3_t __ret; \
   10032   __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \
   10033  \
   10034   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   10035   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   10036   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   10037   __ret; \
   10038 })
   10039 #endif
   10040 
   10041 #ifdef __LITTLE_ENDIAN__
   10042 #define vld3_dup_u8(__p0) __extension__ ({ \
   10043   uint8x8x3_t __ret; \
   10044   __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \
   10045   __ret; \
   10046 })
   10047 #else
   10048 #define vld3_dup_u8(__p0) __extension__ ({ \
   10049   uint8x8x3_t __ret; \
   10050   __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \
   10051  \
   10052   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   10053   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   10054   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   10055   __ret; \
   10056 })
   10057 #endif
   10058 
   10059 #ifdef __LITTLE_ENDIAN__
   10060 #define vld3_dup_u32(__p0) __extension__ ({ \
   10061   uint32x2x3_t __ret; \
   10062   __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \
   10063   __ret; \
   10064 })
   10065 #else
   10066 #define vld3_dup_u32(__p0) __extension__ ({ \
   10067   uint32x2x3_t __ret; \
   10068   __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \
   10069  \
   10070   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   10071   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   10072   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \
   10073   __ret; \
   10074 })
   10075 #endif
   10076 
   10077 #ifdef __LITTLE_ENDIAN__
   10078 #define vld3_dup_u64(__p0) __extension__ ({ \
   10079   uint64x1x3_t __ret; \
   10080   __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \
   10081   __ret; \
   10082 })
   10083 #else
   10084 #define vld3_dup_u64(__p0) __extension__ ({ \
   10085   uint64x1x3_t __ret; \
   10086   __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \
   10087   __ret; \
   10088 })
   10089 #endif
   10090 
   10091 #ifdef __LITTLE_ENDIAN__
   10092 #define vld3_dup_u16(__p0) __extension__ ({ \
   10093   uint16x4x3_t __ret; \
   10094   __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \
   10095   __ret; \
   10096 })
   10097 #else
   10098 #define vld3_dup_u16(__p0) __extension__ ({ \
   10099   uint16x4x3_t __ret; \
   10100   __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \
   10101  \
   10102   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   10103   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   10104   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   10105   __ret; \
   10106 })
   10107 #endif
   10108 
   10109 #ifdef __LITTLE_ENDIAN__
   10110 #define vld3_dup_s8(__p0) __extension__ ({ \
   10111   int8x8x3_t __ret; \
   10112   __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \
   10113   __ret; \
   10114 })
   10115 #else
   10116 #define vld3_dup_s8(__p0) __extension__ ({ \
   10117   int8x8x3_t __ret; \
   10118   __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \
   10119  \
   10120   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   10121   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   10122   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   10123   __ret; \
   10124 })
   10125 #endif
   10126 
   10127 #ifdef __LITTLE_ENDIAN__
   10128 #define vld3_dup_f32(__p0) __extension__ ({ \
   10129   float32x2x3_t __ret; \
   10130   __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \
   10131   __ret; \
   10132 })
   10133 #else
   10134 #define vld3_dup_f32(__p0) __extension__ ({ \
   10135   float32x2x3_t __ret; \
   10136   __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \
   10137  \
   10138   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   10139   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   10140   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \
   10141   __ret; \
   10142 })
   10143 #endif
   10144 
   10145 #ifdef __LITTLE_ENDIAN__
   10146 #define vld3_dup_f16(__p0) __extension__ ({ \
   10147   float16x4x3_t __ret; \
   10148   __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \
   10149   __ret; \
   10150 })
   10151 #else
   10152 #define vld3_dup_f16(__p0) __extension__ ({ \
   10153   float16x4x3_t __ret; \
   10154   __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \
   10155  \
   10156   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   10157   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   10158   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   10159   __ret; \
   10160 })
   10161 #endif
   10162 
   10163 #ifdef __LITTLE_ENDIAN__
   10164 #define vld3_dup_s32(__p0) __extension__ ({ \
   10165   int32x2x3_t __ret; \
   10166   __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \
   10167   __ret; \
   10168 })
   10169 #else
   10170 #define vld3_dup_s32(__p0) __extension__ ({ \
   10171   int32x2x3_t __ret; \
   10172   __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \
   10173  \
   10174   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \
   10175   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \
   10176   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \
   10177   __ret; \
   10178 })
   10179 #endif
   10180 
   10181 #ifdef __LITTLE_ENDIAN__
   10182 #define vld3_dup_s64(__p0) __extension__ ({ \
   10183   int64x1x3_t __ret; \
   10184   __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \
   10185   __ret; \
   10186 })
   10187 #else
   10188 #define vld3_dup_s64(__p0) __extension__ ({ \
   10189   int64x1x3_t __ret; \
   10190   __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \
   10191   __ret; \
   10192 })
   10193 #endif
   10194 
   10195 #ifdef __LITTLE_ENDIAN__
   10196 #define vld3_dup_s16(__p0) __extension__ ({ \
   10197   int16x4x3_t __ret; \
   10198   __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \
   10199   __ret; \
   10200 })
   10201 #else
   10202 #define vld3_dup_s16(__p0) __extension__ ({ \
   10203   int16x4x3_t __ret; \
   10204   __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \
   10205  \
   10206   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   10207   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   10208   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   10209   __ret; \
   10210 })
   10211 #endif
   10212 
   10213 #ifdef __LITTLE_ENDIAN__
   10214 #define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   10215   poly8x8x3_t __s1 = __p1; \
   10216   poly8x8x3_t __ret; \
   10217   __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 4); \
   10218   __ret; \
   10219 })
   10220 #else
   10221 #define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \
   10222   poly8x8x3_t __s1 = __p1; \
   10223   poly8x8x3_t __rev1; \
   10224   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   10225   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   10226   __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   10227   poly8x8x3_t __ret; \
   10228   __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 4); \
   10229  \
   10230   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   10231   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   10232   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   10233   __ret; \
   10234 })
   10235 #endif
   10236 
   10237 #ifdef __LITTLE_ENDIAN__
   10238 #define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   10239   poly16x4x3_t __s1 = __p1; \
   10240   poly16x4x3_t __ret; \
   10241   __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 5); \
   10242   __ret; \
   10243 })
   10244 #else
   10245 #define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   10246   poly16x4x3_t __s1 = __p1; \
   10247   poly16x4x3_t __rev1; \
   10248   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \
   10249   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \
   10250   __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \
   10251   poly16x4x3_t __ret; \
   10252   __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 5); \
   10253  \
   10254   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \
   10255   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \
   10256   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \
   10257   __ret; \
   10258 })
   10259 #endif
   10260 
   10261 #ifdef __LITTLE_ENDIAN__
   10262 #define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   10263   poly16x8x3_t __s1 = __p1; \
   10264   poly16x8x3_t __ret; \
   10265   __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 37); \
   10266   __ret; \
   10267 })
   10268 #else
   10269 #define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \
   10270   poly16x8x3_t __s1 = __p1; \
   10271   poly16x8x3_t __rev1; \
   10272   __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   10273   __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   10274   __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   10275   poly16x8x3_t __ret; \
   10276   __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 37); \
   10277  \
   10278   __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \
   10279   __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \
   10280   __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \
   10281   __ret; \
   10282 })
   10283 #endif
   10284 
   10285 #ifdef __LITTLE_ENDIAN__
   10286 #define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \
   10287   uint32x4x3_t __s1 = __p1; \
   10288   uint32x4x3_t __ret; \
   10289   __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 50); \
   10290   __ret; \
   10291 })
   10292 #else