Home | History | Annotate | Download | only in include
      1 /* ARM NEON intrinsics include file.
      2 
      3    Copyright (C) 2011-2014 Free Software Foundation, Inc.
      4    Contributed by ARM Ltd.
      5 
      6    This file is part of GCC.
      7 
      8    GCC is free software; you can redistribute it and/or modify it
      9    under the terms of the GNU General Public License as published
     10    by the Free Software Foundation; either version 3, or (at your
     11    option) any later version.
     12 
     13    GCC is distributed in the hope that it will be useful, but WITHOUT
     14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     16    License for more details.
     17 
     18    Under Section 7 of GPL version 3, you are granted additional
     19    permissions described in the GCC Runtime Library Exception, version
     20    3.1, as published by the Free Software Foundation.
     21 
     22    You should have received a copy of the GNU General Public License and
     23    a copy of the GCC Runtime Library Exception along with this program;
     24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25    <http://www.gnu.org/licenses/>.  */
     26 
     27 #ifndef _AARCH64_NEON_H_
     28 #define _AARCH64_NEON_H_
     29 
     30 #include <stdint.h>
     31 
     32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
     33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
     34 
     35 typedef __builtin_aarch64_simd_qi int8x8_t
     36   __attribute__ ((__vector_size__ (8)));
     37 typedef __builtin_aarch64_simd_hi int16x4_t
     38   __attribute__ ((__vector_size__ (8)));
     39 typedef __builtin_aarch64_simd_si int32x2_t
     40   __attribute__ ((__vector_size__ (8)));
     41 typedef int64_t int64x1_t;
     42 typedef double float64x1_t;
     43 typedef __builtin_aarch64_simd_sf float32x2_t
     44   __attribute__ ((__vector_size__ (8)));
     45 typedef __builtin_aarch64_simd_poly8 poly8x8_t
     46   __attribute__ ((__vector_size__ (8)));
     47 typedef __builtin_aarch64_simd_poly16 poly16x4_t
     48   __attribute__ ((__vector_size__ (8)));
     49 typedef __builtin_aarch64_simd_uqi uint8x8_t
     50   __attribute__ ((__vector_size__ (8)));
     51 typedef __builtin_aarch64_simd_uhi uint16x4_t
     52   __attribute__ ((__vector_size__ (8)));
     53 typedef __builtin_aarch64_simd_usi uint32x2_t
     54   __attribute__ ((__vector_size__ (8)));
     55 typedef uint64_t uint64x1_t;
     56 typedef __builtin_aarch64_simd_qi int8x16_t
     57   __attribute__ ((__vector_size__ (16)));
     58 typedef __builtin_aarch64_simd_hi int16x8_t
     59   __attribute__ ((__vector_size__ (16)));
     60 typedef __builtin_aarch64_simd_si int32x4_t
     61   __attribute__ ((__vector_size__ (16)));
     62 typedef __builtin_aarch64_simd_di int64x2_t
     63   __attribute__ ((__vector_size__ (16)));
     64 typedef __builtin_aarch64_simd_sf float32x4_t
     65   __attribute__ ((__vector_size__ (16)));
     66 typedef __builtin_aarch64_simd_df float64x2_t
     67   __attribute__ ((__vector_size__ (16)));
     68 typedef __builtin_aarch64_simd_poly8 poly8x16_t
     69   __attribute__ ((__vector_size__ (16)));
     70 typedef __builtin_aarch64_simd_poly16 poly16x8_t
     71   __attribute__ ((__vector_size__ (16)));
     72 typedef __builtin_aarch64_simd_poly64 poly64x2_t
     73   __attribute__ ((__vector_size__ (16)));
     74 typedef __builtin_aarch64_simd_uqi uint8x16_t
     75   __attribute__ ((__vector_size__ (16)));
     76 typedef __builtin_aarch64_simd_uhi uint16x8_t
     77   __attribute__ ((__vector_size__ (16)));
     78 typedef __builtin_aarch64_simd_usi uint32x4_t
     79   __attribute__ ((__vector_size__ (16)));
     80 typedef __builtin_aarch64_simd_udi uint64x2_t
     81   __attribute__ ((__vector_size__ (16)));
     82 
     83 typedef float float32_t;
     84 typedef double float64_t;
     85 typedef __builtin_aarch64_simd_poly8 poly8_t;
     86 typedef __builtin_aarch64_simd_poly16 poly16_t;
     87 typedef __builtin_aarch64_simd_poly64 poly64_t;
     88 typedef __builtin_aarch64_simd_poly128 poly128_t;
     89 
     90 typedef struct int8x8x2_t
     91 {
     92   int8x8_t val[2];
     93 } int8x8x2_t;
     94 
     95 typedef struct int8x16x2_t
     96 {
     97   int8x16_t val[2];
     98 } int8x16x2_t;
     99 
    100 typedef struct int16x4x2_t
    101 {
    102   int16x4_t val[2];
    103 } int16x4x2_t;
    104 
    105 typedef struct int16x8x2_t
    106 {
    107   int16x8_t val[2];
    108 } int16x8x2_t;
    109 
    110 typedef struct int32x2x2_t
    111 {
    112   int32x2_t val[2];
    113 } int32x2x2_t;
    114 
    115 typedef struct int32x4x2_t
    116 {
    117   int32x4_t val[2];
    118 } int32x4x2_t;
    119 
    120 typedef struct int64x1x2_t
    121 {
    122   int64x1_t val[2];
    123 } int64x1x2_t;
    124 
    125 typedef struct int64x2x2_t
    126 {
    127   int64x2_t val[2];
    128 } int64x2x2_t;
    129 
    130 typedef struct uint8x8x2_t
    131 {
    132   uint8x8_t val[2];
    133 } uint8x8x2_t;
    134 
    135 typedef struct uint8x16x2_t
    136 {
    137   uint8x16_t val[2];
    138 } uint8x16x2_t;
    139 
    140 typedef struct uint16x4x2_t
    141 {
    142   uint16x4_t val[2];
    143 } uint16x4x2_t;
    144 
    145 typedef struct uint16x8x2_t
    146 {
    147   uint16x8_t val[2];
    148 } uint16x8x2_t;
    149 
    150 typedef struct uint32x2x2_t
    151 {
    152   uint32x2_t val[2];
    153 } uint32x2x2_t;
    154 
    155 typedef struct uint32x4x2_t
    156 {
    157   uint32x4_t val[2];
    158 } uint32x4x2_t;
    159 
    160 typedef struct uint64x1x2_t
    161 {
    162   uint64x1_t val[2];
    163 } uint64x1x2_t;
    164 
    165 typedef struct uint64x2x2_t
    166 {
    167   uint64x2_t val[2];
    168 } uint64x2x2_t;
    169 
    170 typedef struct float32x2x2_t
    171 {
    172   float32x2_t val[2];
    173 } float32x2x2_t;
    174 
    175 typedef struct float32x4x2_t
    176 {
    177   float32x4_t val[2];
    178 } float32x4x2_t;
    179 
    180 typedef struct float64x2x2_t
    181 {
    182   float64x2_t val[2];
    183 } float64x2x2_t;
    184 
    185 typedef struct float64x1x2_t
    186 {
    187   float64x1_t val[2];
    188 } float64x1x2_t;
    189 
    190 typedef struct poly8x8x2_t
    191 {
    192   poly8x8_t val[2];
    193 } poly8x8x2_t;
    194 
    195 typedef struct poly8x16x2_t
    196 {
    197   poly8x16_t val[2];
    198 } poly8x16x2_t;
    199 
    200 typedef struct poly16x4x2_t
    201 {
    202   poly16x4_t val[2];
    203 } poly16x4x2_t;
    204 
    205 typedef struct poly16x8x2_t
    206 {
    207   poly16x8_t val[2];
    208 } poly16x8x2_t;
    209 
    210 typedef struct int8x8x3_t
    211 {
    212   int8x8_t val[3];
    213 } int8x8x3_t;
    214 
    215 typedef struct int8x16x3_t
    216 {
    217   int8x16_t val[3];
    218 } int8x16x3_t;
    219 
    220 typedef struct int16x4x3_t
    221 {
    222   int16x4_t val[3];
    223 } int16x4x3_t;
    224 
    225 typedef struct int16x8x3_t
    226 {
    227   int16x8_t val[3];
    228 } int16x8x3_t;
    229 
    230 typedef struct int32x2x3_t
    231 {
    232   int32x2_t val[3];
    233 } int32x2x3_t;
    234 
    235 typedef struct int32x4x3_t
    236 {
    237   int32x4_t val[3];
    238 } int32x4x3_t;
    239 
    240 typedef struct int64x1x3_t
    241 {
    242   int64x1_t val[3];
    243 } int64x1x3_t;
    244 
    245 typedef struct int64x2x3_t
    246 {
    247   int64x2_t val[3];
    248 } int64x2x3_t;
    249 
    250 typedef struct uint8x8x3_t
    251 {
    252   uint8x8_t val[3];
    253 } uint8x8x3_t;
    254 
    255 typedef struct uint8x16x3_t
    256 {
    257   uint8x16_t val[3];
    258 } uint8x16x3_t;
    259 
    260 typedef struct uint16x4x3_t
    261 {
    262   uint16x4_t val[3];
    263 } uint16x4x3_t;
    264 
    265 typedef struct uint16x8x3_t
    266 {
    267   uint16x8_t val[3];
    268 } uint16x8x3_t;
    269 
    270 typedef struct uint32x2x3_t
    271 {
    272   uint32x2_t val[3];
    273 } uint32x2x3_t;
    274 
    275 typedef struct uint32x4x3_t
    276 {
    277   uint32x4_t val[3];
    278 } uint32x4x3_t;
    279 
    280 typedef struct uint64x1x3_t
    281 {
    282   uint64x1_t val[3];
    283 } uint64x1x3_t;
    284 
    285 typedef struct uint64x2x3_t
    286 {
    287   uint64x2_t val[3];
    288 } uint64x2x3_t;
    289 
    290 typedef struct float32x2x3_t
    291 {
    292   float32x2_t val[3];
    293 } float32x2x3_t;
    294 
    295 typedef struct float32x4x3_t
    296 {
    297   float32x4_t val[3];
    298 } float32x4x3_t;
    299 
    300 typedef struct float64x2x3_t
    301 {
    302   float64x2_t val[3];
    303 } float64x2x3_t;
    304 
    305 typedef struct float64x1x3_t
    306 {
    307   float64x1_t val[3];
    308 } float64x1x3_t;
    309 
    310 typedef struct poly8x8x3_t
    311 {
    312   poly8x8_t val[3];
    313 } poly8x8x3_t;
    314 
    315 typedef struct poly8x16x3_t
    316 {
    317   poly8x16_t val[3];
    318 } poly8x16x3_t;
    319 
    320 typedef struct poly16x4x3_t
    321 {
    322   poly16x4_t val[3];
    323 } poly16x4x3_t;
    324 
    325 typedef struct poly16x8x3_t
    326 {
    327   poly16x8_t val[3];
    328 } poly16x8x3_t;
    329 
    330 typedef struct int8x8x4_t
    331 {
    332   int8x8_t val[4];
    333 } int8x8x4_t;
    334 
    335 typedef struct int8x16x4_t
    336 {
    337   int8x16_t val[4];
    338 } int8x16x4_t;
    339 
    340 typedef struct int16x4x4_t
    341 {
    342   int16x4_t val[4];
    343 } int16x4x4_t;
    344 
    345 typedef struct int16x8x4_t
    346 {
    347   int16x8_t val[4];
    348 } int16x8x4_t;
    349 
    350 typedef struct int32x2x4_t
    351 {
    352   int32x2_t val[4];
    353 } int32x2x4_t;
    354 
    355 typedef struct int32x4x4_t
    356 {
    357   int32x4_t val[4];
    358 } int32x4x4_t;
    359 
    360 typedef struct int64x1x4_t
    361 {
    362   int64x1_t val[4];
    363 } int64x1x4_t;
    364 
    365 typedef struct int64x2x4_t
    366 {
    367   int64x2_t val[4];
    368 } int64x2x4_t;
    369 
    370 typedef struct uint8x8x4_t
    371 {
    372   uint8x8_t val[4];
    373 } uint8x8x4_t;
    374 
    375 typedef struct uint8x16x4_t
    376 {
    377   uint8x16_t val[4];
    378 } uint8x16x4_t;
    379 
    380 typedef struct uint16x4x4_t
    381 {
    382   uint16x4_t val[4];
    383 } uint16x4x4_t;
    384 
    385 typedef struct uint16x8x4_t
    386 {
    387   uint16x8_t val[4];
    388 } uint16x8x4_t;
    389 
    390 typedef struct uint32x2x4_t
    391 {
    392   uint32x2_t val[4];
    393 } uint32x2x4_t;
    394 
    395 typedef struct uint32x4x4_t
    396 {
    397   uint32x4_t val[4];
    398 } uint32x4x4_t;
    399 
    400 typedef struct uint64x1x4_t
    401 {
    402   uint64x1_t val[4];
    403 } uint64x1x4_t;
    404 
    405 typedef struct uint64x2x4_t
    406 {
    407   uint64x2_t val[4];
    408 } uint64x2x4_t;
    409 
    410 typedef struct float32x2x4_t
    411 {
    412   float32x2_t val[4];
    413 } float32x2x4_t;
    414 
    415 typedef struct float32x4x4_t
    416 {
    417   float32x4_t val[4];
    418 } float32x4x4_t;
    419 
    420 typedef struct float64x2x4_t
    421 {
    422   float64x2_t val[4];
    423 } float64x2x4_t;
    424 
    425 typedef struct float64x1x4_t
    426 {
    427   float64x1_t val[4];
    428 } float64x1x4_t;
    429 
    430 typedef struct poly8x8x4_t
    431 {
    432   poly8x8_t val[4];
    433 } poly8x8x4_t;
    434 
    435 typedef struct poly8x16x4_t
    436 {
    437   poly8x16_t val[4];
    438 } poly8x16x4_t;
    439 
    440 typedef struct poly16x4x4_t
    441 {
    442   poly16x4_t val[4];
    443 } poly16x4x4_t;
    444 
    445 typedef struct poly16x8x4_t
    446 {
    447   poly16x8_t val[4];
    448 } poly16x8x4_t;
    449 
    450 /* vget_lane internal macros.  */
    451 
    452 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
    453   (__cast_ret								\
    454      __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
    455 
    456 #define __aarch64_vget_lane_f32(__a, __b) \
    457   __aarch64_vget_lane_any (v2sf, , , __a, __b)
    458 #define __aarch64_vget_lane_f64(__a, __b) (__a)
    459 
    460 #define __aarch64_vget_lane_p8(__a, __b) \
    461   __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
    462 #define __aarch64_vget_lane_p16(__a, __b) \
    463   __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
    464 
    465 #define __aarch64_vget_lane_s8(__a, __b) \
    466   __aarch64_vget_lane_any (v8qi, , ,__a, __b)
    467 #define __aarch64_vget_lane_s16(__a, __b) \
    468   __aarch64_vget_lane_any (v4hi, , ,__a, __b)
    469 #define __aarch64_vget_lane_s32(__a, __b) \
    470   __aarch64_vget_lane_any (v2si, , ,__a, __b)
    471 #define __aarch64_vget_lane_s64(__a, __b) (__a)
    472 
    473 #define __aarch64_vget_lane_u8(__a, __b) \
    474   __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
    475 #define __aarch64_vget_lane_u16(__a, __b) \
    476   __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
    477 #define __aarch64_vget_lane_u32(__a, __b) \
    478   __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
    479 #define __aarch64_vget_lane_u64(__a, __b) (__a)
    480 
    481 #define __aarch64_vgetq_lane_f32(__a, __b) \
    482   __aarch64_vget_lane_any (v4sf, , , __a, __b)
    483 #define __aarch64_vgetq_lane_f64(__a, __b) \
    484   __aarch64_vget_lane_any (v2df, , , __a, __b)
    485 
    486 #define __aarch64_vgetq_lane_p8(__a, __b) \
    487   __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
    488 #define __aarch64_vgetq_lane_p16(__a, __b) \
    489   __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
    490 
    491 #define __aarch64_vgetq_lane_s8(__a, __b) \
    492   __aarch64_vget_lane_any (v16qi, , ,__a, __b)
    493 #define __aarch64_vgetq_lane_s16(__a, __b) \
    494   __aarch64_vget_lane_any (v8hi, , ,__a, __b)
    495 #define __aarch64_vgetq_lane_s32(__a, __b) \
    496   __aarch64_vget_lane_any (v4si, , ,__a, __b)
    497 #define __aarch64_vgetq_lane_s64(__a, __b) \
    498   __aarch64_vget_lane_any (v2di, , ,__a, __b)
    499 
    500 #define __aarch64_vgetq_lane_u8(__a, __b) \
    501   __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
    502 #define __aarch64_vgetq_lane_u16(__a, __b) \
    503   __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
    504 #define __aarch64_vgetq_lane_u32(__a, __b) \
    505   __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
    506 #define __aarch64_vgetq_lane_u64(__a, __b) \
    507   __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
    508 
    509 /* __aarch64_vdup_lane internal macros.  */
    510 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
    511   vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
    512 
    513 #define __aarch64_vdup_lane_f32(__a, __b) \
    514    __aarch64_vdup_lane_any (f32, , , __a, __b)
    515 #define __aarch64_vdup_lane_f64(__a, __b) (__a)
    516 #define __aarch64_vdup_lane_p8(__a, __b) \
    517    __aarch64_vdup_lane_any (p8, , , __a, __b)
    518 #define __aarch64_vdup_lane_p16(__a, __b) \
    519    __aarch64_vdup_lane_any (p16, , , __a, __b)
    520 #define __aarch64_vdup_lane_s8(__a, __b) \
    521    __aarch64_vdup_lane_any (s8, , , __a, __b)
    522 #define __aarch64_vdup_lane_s16(__a, __b) \
    523    __aarch64_vdup_lane_any (s16, , , __a, __b)
    524 #define __aarch64_vdup_lane_s32(__a, __b) \
    525    __aarch64_vdup_lane_any (s32, , , __a, __b)
    526 #define __aarch64_vdup_lane_s64(__a, __b) (__a)
    527 #define __aarch64_vdup_lane_u8(__a, __b) \
    528    __aarch64_vdup_lane_any (u8, , , __a, __b)
    529 #define __aarch64_vdup_lane_u16(__a, __b) \
    530    __aarch64_vdup_lane_any (u16, , , __a, __b)
    531 #define __aarch64_vdup_lane_u32(__a, __b) \
    532    __aarch64_vdup_lane_any (u32, , , __a, __b)
    533 #define __aarch64_vdup_lane_u64(__a, __b) (__a)
    534 
    535 /* __aarch64_vdup_laneq internal macros.  */
    536 #define __aarch64_vdup_laneq_f32(__a, __b) \
    537    __aarch64_vdup_lane_any (f32, , q, __a, __b)
    538 #define __aarch64_vdup_laneq_f64(__a, __b) \
    539    __aarch64_vdup_lane_any (f64, , q, __a, __b)
    540 #define __aarch64_vdup_laneq_p8(__a, __b) \
    541    __aarch64_vdup_lane_any (p8, , q, __a, __b)
    542 #define __aarch64_vdup_laneq_p16(__a, __b) \
    543    __aarch64_vdup_lane_any (p16, , q, __a, __b)
    544 #define __aarch64_vdup_laneq_s8(__a, __b) \
    545    __aarch64_vdup_lane_any (s8, , q, __a, __b)
    546 #define __aarch64_vdup_laneq_s16(__a, __b) \
    547    __aarch64_vdup_lane_any (s16, , q, __a, __b)
    548 #define __aarch64_vdup_laneq_s32(__a, __b) \
    549    __aarch64_vdup_lane_any (s32, , q, __a, __b)
    550 #define __aarch64_vdup_laneq_s64(__a, __b) \
    551    __aarch64_vdup_lane_any (s64, , q, __a, __b)
    552 #define __aarch64_vdup_laneq_u8(__a, __b) \
    553    __aarch64_vdup_lane_any (u8, , q, __a, __b)
    554 #define __aarch64_vdup_laneq_u16(__a, __b) \
    555    __aarch64_vdup_lane_any (u16, , q, __a, __b)
    556 #define __aarch64_vdup_laneq_u32(__a, __b) \
    557    __aarch64_vdup_lane_any (u32, , q, __a, __b)
    558 #define __aarch64_vdup_laneq_u64(__a, __b) \
    559    __aarch64_vdup_lane_any (u64, , q, __a, __b)
    560 
    561 /* __aarch64_vdupq_lane internal macros.  */
    562 #define __aarch64_vdupq_lane_f32(__a, __b) \
    563    __aarch64_vdup_lane_any (f32, q, , __a, __b)
    564 #define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
    565 #define __aarch64_vdupq_lane_p8(__a, __b) \
    566    __aarch64_vdup_lane_any (p8, q, , __a, __b)
    567 #define __aarch64_vdupq_lane_p16(__a, __b) \
    568    __aarch64_vdup_lane_any (p16, q, , __a, __b)
    569 #define __aarch64_vdupq_lane_s8(__a, __b) \
    570    __aarch64_vdup_lane_any (s8, q, , __a, __b)
    571 #define __aarch64_vdupq_lane_s16(__a, __b) \
    572    __aarch64_vdup_lane_any (s16, q, , __a, __b)
    573 #define __aarch64_vdupq_lane_s32(__a, __b) \
    574    __aarch64_vdup_lane_any (s32, q, , __a, __b)
    575 #define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
    576 #define __aarch64_vdupq_lane_u8(__a, __b) \
    577    __aarch64_vdup_lane_any (u8, q, , __a, __b)
    578 #define __aarch64_vdupq_lane_u16(__a, __b) \
    579    __aarch64_vdup_lane_any (u16, q, , __a, __b)
    580 #define __aarch64_vdupq_lane_u32(__a, __b) \
    581    __aarch64_vdup_lane_any (u32, q, , __a, __b)
    582 #define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
    583 
    584 /* __aarch64_vdupq_laneq internal macros.  */
    585 #define __aarch64_vdupq_laneq_f32(__a, __b) \
    586    __aarch64_vdup_lane_any (f32, q, q, __a, __b)
    587 #define __aarch64_vdupq_laneq_f64(__a, __b) \
    588    __aarch64_vdup_lane_any (f64, q, q, __a, __b)
    589 #define __aarch64_vdupq_laneq_p8(__a, __b) \
    590    __aarch64_vdup_lane_any (p8, q, q, __a, __b)
    591 #define __aarch64_vdupq_laneq_p16(__a, __b) \
    592    __aarch64_vdup_lane_any (p16, q, q, __a, __b)
    593 #define __aarch64_vdupq_laneq_s8(__a, __b) \
    594    __aarch64_vdup_lane_any (s8, q, q, __a, __b)
    595 #define __aarch64_vdupq_laneq_s16(__a, __b) \
    596    __aarch64_vdup_lane_any (s16, q, q, __a, __b)
    597 #define __aarch64_vdupq_laneq_s32(__a, __b) \
    598    __aarch64_vdup_lane_any (s32, q, q, __a, __b)
    599 #define __aarch64_vdupq_laneq_s64(__a, __b) \
    600    __aarch64_vdup_lane_any (s64, q, q, __a, __b)
    601 #define __aarch64_vdupq_laneq_u8(__a, __b) \
    602    __aarch64_vdup_lane_any (u8, q, q, __a, __b)
    603 #define __aarch64_vdupq_laneq_u16(__a, __b) \
    604    __aarch64_vdup_lane_any (u16, q, q, __a, __b)
    605 #define __aarch64_vdupq_laneq_u32(__a, __b) \
    606    __aarch64_vdup_lane_any (u32, q, q, __a, __b)
    607 #define __aarch64_vdupq_laneq_u64(__a, __b) \
    608    __aarch64_vdup_lane_any (u64, q, q, __a, __b)
    609 
    610 /* vadd  */
    611 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    612 vadd_s8 (int8x8_t __a, int8x8_t __b)
    613 {
    614   return __a + __b;
    615 }
    616 
    617 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    618 vadd_s16 (int16x4_t __a, int16x4_t __b)
    619 {
    620   return __a + __b;
    621 }
    622 
    623 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    624 vadd_s32 (int32x2_t __a, int32x2_t __b)
    625 {
    626   return __a + __b;
    627 }
    628 
    629 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
    630 vadd_f32 (float32x2_t __a, float32x2_t __b)
    631 {
    632   return __a + __b;
    633 }
    634 
    635 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
    636 vadd_f64 (float64x1_t __a, float64x1_t __b)
    637 {
    638   return __a + __b;
    639 }
    640 
    641 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    642 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
    643 {
    644   return __a + __b;
    645 }
    646 
    647 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    648 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
    649 {
    650   return __a + __b;
    651 }
    652 
    653 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    654 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
    655 {
    656   return __a + __b;
    657 }
    658 
    659 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
    660 vadd_s64 (int64x1_t __a, int64x1_t __b)
    661 {
    662   return __a + __b;
    663 }
    664 
    665 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
    666 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
    667 {
    668   return __a + __b;
    669 }
    670 
    671 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
    672 vaddq_s8 (int8x16_t __a, int8x16_t __b)
    673 {
    674   return __a + __b;
    675 }
    676 
    677 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
    678 vaddq_s16 (int16x8_t __a, int16x8_t __b)
    679 {
    680   return __a + __b;
    681 }
    682 
    683 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
    684 vaddq_s32 (int32x4_t __a, int32x4_t __b)
    685 {
    686   return __a + __b;
    687 }
    688 
    689 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
    690 vaddq_s64 (int64x2_t __a, int64x2_t __b)
    691 {
    692   return __a + __b;
    693 }
    694 
    695 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
    696 vaddq_f32 (float32x4_t __a, float32x4_t __b)
    697 {
    698   return __a + __b;
    699 }
    700 
    701 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
    702 vaddq_f64 (float64x2_t __a, float64x2_t __b)
    703 {
    704   return __a + __b;
    705 }
    706 
    707 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
    708 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
    709 {
    710   return __a + __b;
    711 }
    712 
    713 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
    714 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
    715 {
    716   return __a + __b;
    717 }
    718 
    719 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
    720 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
    721 {
    722   return __a + __b;
    723 }
    724 
    725 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
    726 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
    727 {
    728   return __a + __b;
    729 }
    730 
    731 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
    732 vaddl_s8 (int8x8_t __a, int8x8_t __b)
    733 {
    734   return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
    735 }
    736 
    737 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
    738 vaddl_s16 (int16x4_t __a, int16x4_t __b)
    739 {
    740   return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
    741 }
    742 
    743 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
    744 vaddl_s32 (int32x2_t __a, int32x2_t __b)
    745 {
    746   return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
    747 }
    748 
    749 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
    750 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
    751 {
    752   return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
    753 						   (int8x8_t) __b);
    754 }
    755 
    756 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
    757 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
    758 {
    759   return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
    760 						   (int16x4_t) __b);
    761 }
    762 
    763 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
    764 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
    765 {
    766   return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
    767 						   (int32x2_t) __b);
    768 }
    769 
    770 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
    771 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
    772 {
    773   return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
    774 }
    775 
    776 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
    777 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
    778 {
    779   return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
    780 }
    781 
    782 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
    783 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
    784 {
    785   return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
    786 }
    787 
    788 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
    789 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
    790 {
    791   return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
    792 						     (int8x16_t) __b);
    793 }
    794 
    795 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
    796 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
    797 {
    798   return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
    799 						    (int16x8_t) __b);
    800 }
    801 
    802 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
    803 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
    804 {
    805   return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
    806 						    (int32x4_t) __b);
    807 }
    808 
    809 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
    810 vaddw_s8 (int16x8_t __a, int8x8_t __b)
    811 {
    812   return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
    813 }
    814 
    815 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
    816 vaddw_s16 (int32x4_t __a, int16x4_t __b)
    817 {
    818   return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
    819 }
    820 
    821 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
    822 vaddw_s32 (int64x2_t __a, int32x2_t __b)
    823 {
    824   return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
    825 }
    826 
    827 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
    828 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
    829 {
    830   return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
    831 						   (int8x8_t) __b);
    832 }
    833 
    834 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
    835 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
    836 {
    837   return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
    838 						   (int16x4_t) __b);
    839 }
    840 
    841 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
    842 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
    843 {
    844   return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
    845 						   (int32x2_t) __b);
    846 }
    847 
    848 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
    849 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
    850 {
    851   return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
    852 }
    853 
    854 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
    855 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
    856 {
    857   return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
    858 }
    859 
    860 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
    861 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
    862 {
    863   return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
    864 }
    865 
    866 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
    867 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
    868 {
    869   return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
    870 						     (int8x16_t) __b);
    871 }
    872 
    873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
    874 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
    875 {
    876   return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
    877 						    (int16x8_t) __b);
    878 }
    879 
    880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
    881 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
    882 {
    883   return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
    884 						    (int32x4_t) __b);
    885 }
    886 
    887 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    888 vhadd_s8 (int8x8_t __a, int8x8_t __b)
    889 {
    890   return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
    891 }
    892 
    893 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    894 vhadd_s16 (int16x4_t __a, int16x4_t __b)
    895 {
    896   return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
    897 }
    898 
    899 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    900 vhadd_s32 (int32x2_t __a, int32x2_t __b)
    901 {
    902   return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
    903 }
    904 
    905 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    906 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
    907 {
    908   return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
    909 						  (int8x8_t) __b);
    910 }
    911 
    912 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    913 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
    914 {
    915   return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
    916 						   (int16x4_t) __b);
    917 }
    918 
    919 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    920 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
    921 {
    922   return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
    923 						   (int32x2_t) __b);
    924 }
    925 
    926 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
    927 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
    928 {
    929   return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
    930 }
    931 
    932 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
    933 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
    934 {
    935   return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
    936 }
    937 
    938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
    939 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
    940 {
    941   return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
    942 }
    943 
    944 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
    945 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
    946 {
    947   return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
    948 						    (int8x16_t) __b);
    949 }
    950 
    951 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
    952 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
    953 {
    954   return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
    955 						   (int16x8_t) __b);
    956 }
    957 
    958 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
    959 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
    960 {
    961   return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
    962 						   (int32x4_t) __b);
    963 }
    964 
    965 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    966 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
    967 {
    968   return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
    969 }
    970 
    971 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    972 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
    973 {
    974   return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
    975 }
    976 
    977 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    978 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
    979 {
    980   return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
    981 }
    982 
    983 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    984 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
    985 {
    986   return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
    987 						   (int8x8_t) __b);
    988 }
    989 
    990 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    991 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
    992 {
    993   return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
    994 						    (int16x4_t) __b);
    995 }
    996 
    997 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    998 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
    999 {
   1000   return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
   1001 						    (int32x2_t) __b);
   1002 }
   1003 
   1004 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1005 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
   1006 {
   1007   return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
   1008 }
   1009 
   1010 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1011 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
   1012 {
   1013   return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
   1014 }
   1015 
   1016 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1017 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
   1018 {
   1019   return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
   1020 }
   1021 
   1022 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1023 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
   1024 {
   1025   return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
   1026 						     (int8x16_t) __b);
   1027 }
   1028 
   1029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1030 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
   1031 {
   1032   return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
   1033 						    (int16x8_t) __b);
   1034 }
   1035 
   1036 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1037 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
   1038 {
   1039   return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
   1040 						    (int32x4_t) __b);
   1041 }
   1042 
   1043 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1044 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
   1045 {
   1046   return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
   1047 }
   1048 
   1049 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1050 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
   1051 {
   1052   return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
   1053 }
   1054 
   1055 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1056 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
   1057 {
   1058   return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
   1059 }
   1060 
   1061 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1062 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
   1063 {
   1064   return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
   1065 						  (int16x8_t) __b);
   1066 }
   1067 
   1068 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1069 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
   1070 {
   1071   return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
   1072 						   (int32x4_t) __b);
   1073 }
   1074 
   1075 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1076 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
   1077 {
   1078   return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
   1079 						   (int64x2_t) __b);
   1080 }
   1081 
   1082 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1083 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
   1084 {
   1085   return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
   1086 }
   1087 
   1088 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1089 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
   1090 {
   1091   return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
   1092 }
   1093 
   1094 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1095 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
   1096 {
   1097   return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
   1098 }
   1099 
   1100 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1101 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
   1102 {
   1103   return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
   1104 						   (int16x8_t) __b);
   1105 }
   1106 
   1107 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1108 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
   1109 {
   1110   return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
   1111 						    (int32x4_t) __b);
   1112 }
   1113 
   1114 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1115 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
   1116 {
   1117   return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
   1118 						    (int64x2_t) __b);
   1119 }
   1120 
   1121 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1122 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
   1123 {
   1124   return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
   1125 }
   1126 
   1127 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1128 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
   1129 {
   1130   return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
   1131 }
   1132 
   1133 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1134 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
   1135 {
   1136   return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
   1137 }
   1138 
   1139 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1140 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
   1141 {
   1142   return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
   1143 						    (int16x8_t) __b,
   1144 						    (int16x8_t) __c);
   1145 }
   1146 
   1147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1148 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
   1149 {
   1150   return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
   1151 						    (int32x4_t) __b,
   1152 						    (int32x4_t) __c);
   1153 }
   1154 
   1155 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1156 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
   1157 {
   1158   return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
   1159 						    (int64x2_t) __b,
   1160 						    (int64x2_t) __c);
   1161 }
   1162 
   1163 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1164 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
   1165 {
   1166   return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
   1167 }
   1168 
   1169 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1170 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
   1171 {
   1172   return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
   1173 }
   1174 
   1175 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1176 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
   1177 {
   1178   return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
   1179 }
   1180 
   1181 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1182 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
   1183 {
   1184   return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
   1185 						     (int16x8_t) __b,
   1186 						     (int16x8_t) __c);
   1187 }
   1188 
   1189 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1190 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
   1191 {
   1192   return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
   1193 						     (int32x4_t) __b,
   1194 						     (int32x4_t) __c);
   1195 }
   1196 
   1197 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1198 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
   1199 {
   1200   return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
   1201 						     (int64x2_t) __b,
   1202 						     (int64x2_t) __c);
   1203 }
   1204 
   1205 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   1206 vdiv_f32 (float32x2_t __a, float32x2_t __b)
   1207 {
   1208   return __a / __b;
   1209 }
   1210 
   1211 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   1212 vdiv_f64 (float64x1_t __a, float64x1_t __b)
   1213 {
   1214   return __a / __b;
   1215 }
   1216 
   1217 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   1218 vdivq_f32 (float32x4_t __a, float32x4_t __b)
   1219 {
   1220   return __a / __b;
   1221 }
   1222 
   1223 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   1224 vdivq_f64 (float64x2_t __a, float64x2_t __b)
   1225 {
   1226   return __a / __b;
   1227 }
   1228 
   1229 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1230 vmul_s8 (int8x8_t __a, int8x8_t __b)
   1231 {
   1232   return __a * __b;
   1233 }
   1234 
   1235 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1236 vmul_s16 (int16x4_t __a, int16x4_t __b)
   1237 {
   1238   return __a * __b;
   1239 }
   1240 
   1241 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1242 vmul_s32 (int32x2_t __a, int32x2_t __b)
   1243 {
   1244   return __a * __b;
   1245 }
   1246 
   1247 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   1248 vmul_f32 (float32x2_t __a, float32x2_t __b)
   1249 {
   1250   return __a * __b;
   1251 }
   1252 
   1253 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1254 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
   1255 {
   1256   return __a * __b;
   1257 }
   1258 
   1259 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1260 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
   1261 {
   1262   return __a * __b;
   1263 }
   1264 
   1265 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1266 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
   1267 {
   1268   return __a * __b;
   1269 }
   1270 
   1271 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   1272 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
   1273 {
   1274   return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
   1275 						 (int8x8_t) __b);
   1276 }
   1277 
   1278 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1279 vmulq_s8 (int8x16_t __a, int8x16_t __b)
   1280 {
   1281   return __a * __b;
   1282 }
   1283 
   1284 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1285 vmulq_s16 (int16x8_t __a, int16x8_t __b)
   1286 {
   1287   return __a * __b;
   1288 }
   1289 
   1290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1291 vmulq_s32 (int32x4_t __a, int32x4_t __b)
   1292 {
   1293   return __a * __b;
   1294 }
   1295 
   1296 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   1297 vmulq_f32 (float32x4_t __a, float32x4_t __b)
   1298 {
   1299   return __a * __b;
   1300 }
   1301 
   1302 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   1303 vmulq_f64 (float64x2_t __a, float64x2_t __b)
   1304 {
   1305   return __a * __b;
   1306 }
   1307 
   1308 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1309 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
   1310 {
   1311   return __a * __b;
   1312 }
   1313 
   1314 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1315 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
   1316 {
   1317   return __a * __b;
   1318 }
   1319 
   1320 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1321 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
   1322 {
   1323   return __a * __b;
   1324 }
   1325 
   1326 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   1327 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
   1328 {
   1329   return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
   1330 						   (int8x16_t) __b);
   1331 }
   1332 
   1333 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1334 vand_s8 (int8x8_t __a, int8x8_t __b)
   1335 {
   1336   return __a & __b;
   1337 }
   1338 
   1339 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1340 vand_s16 (int16x4_t __a, int16x4_t __b)
   1341 {
   1342   return __a & __b;
   1343 }
   1344 
   1345 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1346 vand_s32 (int32x2_t __a, int32x2_t __b)
   1347 {
   1348   return __a & __b;
   1349 }
   1350 
   1351 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1352 vand_u8 (uint8x8_t __a, uint8x8_t __b)
   1353 {
   1354   return __a & __b;
   1355 }
   1356 
   1357 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1358 vand_u16 (uint16x4_t __a, uint16x4_t __b)
   1359 {
   1360   return __a & __b;
   1361 }
   1362 
   1363 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1364 vand_u32 (uint32x2_t __a, uint32x2_t __b)
   1365 {
   1366   return __a & __b;
   1367 }
   1368 
   1369 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   1370 vand_s64 (int64x1_t __a, int64x1_t __b)
   1371 {
   1372   return __a & __b;
   1373 }
   1374 
   1375 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   1376 vand_u64 (uint64x1_t __a, uint64x1_t __b)
   1377 {
   1378   return __a & __b;
   1379 }
   1380 
   1381 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1382 vandq_s8 (int8x16_t __a, int8x16_t __b)
   1383 {
   1384   return __a & __b;
   1385 }
   1386 
   1387 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1388 vandq_s16 (int16x8_t __a, int16x8_t __b)
   1389 {
   1390   return __a & __b;
   1391 }
   1392 
   1393 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1394 vandq_s32 (int32x4_t __a, int32x4_t __b)
   1395 {
   1396   return __a & __b;
   1397 }
   1398 
   1399 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1400 vandq_s64 (int64x2_t __a, int64x2_t __b)
   1401 {
   1402   return __a & __b;
   1403 }
   1404 
   1405 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1406 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
   1407 {
   1408   return __a & __b;
   1409 }
   1410 
   1411 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1412 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
   1413 {
   1414   return __a & __b;
   1415 }
   1416 
   1417 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1418 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
   1419 {
   1420   return __a & __b;
   1421 }
   1422 
   1423 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   1424 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
   1425 {
   1426   return __a & __b;
   1427 }
   1428 
   1429 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1430 vorr_s8 (int8x8_t __a, int8x8_t __b)
   1431 {
   1432   return __a | __b;
   1433 }
   1434 
   1435 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1436 vorr_s16 (int16x4_t __a, int16x4_t __b)
   1437 {
   1438   return __a | __b;
   1439 }
   1440 
   1441 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1442 vorr_s32 (int32x2_t __a, int32x2_t __b)
   1443 {
   1444   return __a | __b;
   1445 }
   1446 
   1447 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1448 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
   1449 {
   1450   return __a | __b;
   1451 }
   1452 
   1453 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1454 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
   1455 {
   1456   return __a | __b;
   1457 }
   1458 
   1459 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1460 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
   1461 {
   1462   return __a | __b;
   1463 }
   1464 
   1465 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   1466 vorr_s64 (int64x1_t __a, int64x1_t __b)
   1467 {
   1468   return __a | __b;
   1469 }
   1470 
   1471 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   1472 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
   1473 {
   1474   return __a | __b;
   1475 }
   1476 
   1477 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1478 vorrq_s8 (int8x16_t __a, int8x16_t __b)
   1479 {
   1480   return __a | __b;
   1481 }
   1482 
   1483 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1484 vorrq_s16 (int16x8_t __a, int16x8_t __b)
   1485 {
   1486   return __a | __b;
   1487 }
   1488 
   1489 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1490 vorrq_s32 (int32x4_t __a, int32x4_t __b)
   1491 {
   1492   return __a | __b;
   1493 }
   1494 
   1495 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1496 vorrq_s64 (int64x2_t __a, int64x2_t __b)
   1497 {
   1498   return __a | __b;
   1499 }
   1500 
   1501 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1502 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
   1503 {
   1504   return __a | __b;
   1505 }
   1506 
   1507 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1508 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
   1509 {
   1510   return __a | __b;
   1511 }
   1512 
   1513 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1514 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
   1515 {
   1516   return __a | __b;
   1517 }
   1518 
   1519 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   1520 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
   1521 {
   1522   return __a | __b;
   1523 }
   1524 
   1525 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1526 veor_s8 (int8x8_t __a, int8x8_t __b)
   1527 {
   1528   return __a ^ __b;
   1529 }
   1530 
   1531 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1532 veor_s16 (int16x4_t __a, int16x4_t __b)
   1533 {
   1534   return __a ^ __b;
   1535 }
   1536 
   1537 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1538 veor_s32 (int32x2_t __a, int32x2_t __b)
   1539 {
   1540   return __a ^ __b;
   1541 }
   1542 
   1543 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1544 veor_u8 (uint8x8_t __a, uint8x8_t __b)
   1545 {
   1546   return __a ^ __b;
   1547 }
   1548 
   1549 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1550 veor_u16 (uint16x4_t __a, uint16x4_t __b)
   1551 {
   1552   return __a ^ __b;
   1553 }
   1554 
   1555 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1556 veor_u32 (uint32x2_t __a, uint32x2_t __b)
   1557 {
   1558   return __a ^ __b;
   1559 }
   1560 
   1561 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   1562 veor_s64 (int64x1_t __a, int64x1_t __b)
   1563 {
   1564   return __a ^ __b;
   1565 }
   1566 
   1567 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   1568 veor_u64 (uint64x1_t __a, uint64x1_t __b)
   1569 {
   1570   return __a ^ __b;
   1571 }
   1572 
   1573 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1574 veorq_s8 (int8x16_t __a, int8x16_t __b)
   1575 {
   1576   return __a ^ __b;
   1577 }
   1578 
   1579 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1580 veorq_s16 (int16x8_t __a, int16x8_t __b)
   1581 {
   1582   return __a ^ __b;
   1583 }
   1584 
   1585 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1586 veorq_s32 (int32x4_t __a, int32x4_t __b)
   1587 {
   1588   return __a ^ __b;
   1589 }
   1590 
   1591 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1592 veorq_s64 (int64x2_t __a, int64x2_t __b)
   1593 {
   1594   return __a ^ __b;
   1595 }
   1596 
   1597 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1598 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
   1599 {
   1600   return __a ^ __b;
   1601 }
   1602 
   1603 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1604 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
   1605 {
   1606   return __a ^ __b;
   1607 }
   1608 
   1609 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1610 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
   1611 {
   1612   return __a ^ __b;
   1613 }
   1614 
   1615 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   1616 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
   1617 {
   1618   return __a ^ __b;
   1619 }
   1620 
   1621 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1622 vbic_s8 (int8x8_t __a, int8x8_t __b)
   1623 {
   1624   return __a & ~__b;
   1625 }
   1626 
   1627 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1628 vbic_s16 (int16x4_t __a, int16x4_t __b)
   1629 {
   1630   return __a & ~__b;
   1631 }
   1632 
   1633 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1634 vbic_s32 (int32x2_t __a, int32x2_t __b)
   1635 {
   1636   return __a & ~__b;
   1637 }
   1638 
   1639 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1640 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
   1641 {
   1642   return __a & ~__b;
   1643 }
   1644 
   1645 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1646 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
   1647 {
   1648   return __a & ~__b;
   1649 }
   1650 
   1651 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1652 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
   1653 {
   1654   return __a & ~__b;
   1655 }
   1656 
   1657 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   1658 vbic_s64 (int64x1_t __a, int64x1_t __b)
   1659 {
   1660   return __a & ~__b;
   1661 }
   1662 
   1663 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   1664 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
   1665 {
   1666   return __a & ~__b;
   1667 }
   1668 
   1669 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1670 vbicq_s8 (int8x16_t __a, int8x16_t __b)
   1671 {
   1672   return __a & ~__b;
   1673 }
   1674 
   1675 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1676 vbicq_s16 (int16x8_t __a, int16x8_t __b)
   1677 {
   1678   return __a & ~__b;
   1679 }
   1680 
   1681 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1682 vbicq_s32 (int32x4_t __a, int32x4_t __b)
   1683 {
   1684   return __a & ~__b;
   1685 }
   1686 
   1687 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1688 vbicq_s64 (int64x2_t __a, int64x2_t __b)
   1689 {
   1690   return __a & ~__b;
   1691 }
   1692 
   1693 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1694 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
   1695 {
   1696   return __a & ~__b;
   1697 }
   1698 
   1699 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1700 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
   1701 {
   1702   return __a & ~__b;
   1703 }
   1704 
   1705 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1706 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
   1707 {
   1708   return __a & ~__b;
   1709 }
   1710 
   1711 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   1712 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
   1713 {
   1714   return __a & ~__b;
   1715 }
   1716 
   1717 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1718 vorn_s8 (int8x8_t __a, int8x8_t __b)
   1719 {
   1720   return __a | ~__b;
   1721 }
   1722 
   1723 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1724 vorn_s16 (int16x4_t __a, int16x4_t __b)
   1725 {
   1726   return __a | ~__b;
   1727 }
   1728 
   1729 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1730 vorn_s32 (int32x2_t __a, int32x2_t __b)
   1731 {
   1732   return __a | ~__b;
   1733 }
   1734 
   1735 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1736 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
   1737 {
   1738   return __a | ~__b;
   1739 }
   1740 
   1741 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1742 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
   1743 {
   1744   return __a | ~__b;
   1745 }
   1746 
   1747 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1748 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
   1749 {
   1750   return __a | ~__b;
   1751 }
   1752 
   1753 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   1754 vorn_s64 (int64x1_t __a, int64x1_t __b)
   1755 {
   1756   return __a | ~__b;
   1757 }
   1758 
   1759 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   1760 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
   1761 {
   1762   return __a | ~__b;
   1763 }
   1764 
   1765 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1766 vornq_s8 (int8x16_t __a, int8x16_t __b)
   1767 {
   1768   return __a | ~__b;
   1769 }
   1770 
   1771 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1772 vornq_s16 (int16x8_t __a, int16x8_t __b)
   1773 {
   1774   return __a | ~__b;
   1775 }
   1776 
   1777 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1778 vornq_s32 (int32x4_t __a, int32x4_t __b)
   1779 {
   1780   return __a | ~__b;
   1781 }
   1782 
   1783 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1784 vornq_s64 (int64x2_t __a, int64x2_t __b)
   1785 {
   1786   return __a | ~__b;
   1787 }
   1788 
   1789 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1790 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
   1791 {
   1792   return __a | ~__b;
   1793 }
   1794 
   1795 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1796 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
   1797 {
   1798   return __a | ~__b;
   1799 }
   1800 
   1801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1802 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
   1803 {
   1804   return __a | ~__b;
   1805 }
   1806 
   1807 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   1808 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
   1809 {
   1810   return __a | ~__b;
   1811 }
   1812 
   1813 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   1814 vsub_s8 (int8x8_t __a, int8x8_t __b)
   1815 {
   1816   return __a - __b;
   1817 }
   1818 
   1819 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   1820 vsub_s16 (int16x4_t __a, int16x4_t __b)
   1821 {
   1822   return __a - __b;
   1823 }
   1824 
   1825 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   1826 vsub_s32 (int32x2_t __a, int32x2_t __b)
   1827 {
   1828   return __a - __b;
   1829 }
   1830 
   1831 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   1832 vsub_f32 (float32x2_t __a, float32x2_t __b)
   1833 {
   1834   return __a - __b;
   1835 }
   1836 
   1837 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   1838 vsub_f64 (float64x1_t __a, float64x1_t __b)
   1839 {
   1840   return __a - __b;
   1841 }
   1842 
   1843 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   1844 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
   1845 {
   1846   return __a - __b;
   1847 }
   1848 
   1849 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   1850 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
   1851 {
   1852   return __a - __b;
   1853 }
   1854 
   1855 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   1856 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
   1857 {
   1858   return __a - __b;
   1859 }
   1860 
   1861 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   1862 vsub_s64 (int64x1_t __a, int64x1_t __b)
   1863 {
   1864   return __a - __b;
   1865 }
   1866 
   1867 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   1868 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
   1869 {
   1870   return __a - __b;
   1871 }
   1872 
   1873 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   1874 vsubq_s8 (int8x16_t __a, int8x16_t __b)
   1875 {
   1876   return __a - __b;
   1877 }
   1878 
   1879 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1880 vsubq_s16 (int16x8_t __a, int16x8_t __b)
   1881 {
   1882   return __a - __b;
   1883 }
   1884 
   1885 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1886 vsubq_s32 (int32x4_t __a, int32x4_t __b)
   1887 {
   1888   return __a - __b;
   1889 }
   1890 
   1891 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1892 vsubq_s64 (int64x2_t __a, int64x2_t __b)
   1893 {
   1894   return __a - __b;
   1895 }
   1896 
   1897 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   1898 vsubq_f32 (float32x4_t __a, float32x4_t __b)
   1899 {
   1900   return __a - __b;
   1901 }
   1902 
   1903 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   1904 vsubq_f64 (float64x2_t __a, float64x2_t __b)
   1905 {
   1906   return __a - __b;
   1907 }
   1908 
   1909 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   1910 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
   1911 {
   1912   return __a - __b;
   1913 }
   1914 
   1915 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1916 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
   1917 {
   1918   return __a - __b;
   1919 }
   1920 
   1921 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1922 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
   1923 {
   1924   return __a - __b;
   1925 }
   1926 
   1927 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   1928 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
   1929 {
   1930   return __a - __b;
   1931 }
   1932 
   1933 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1934 vsubl_s8 (int8x8_t __a, int8x8_t __b)
   1935 {
   1936   return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
   1937 }
   1938 
   1939 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1940 vsubl_s16 (int16x4_t __a, int16x4_t __b)
   1941 {
   1942   return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
   1943 }
   1944 
   1945 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1946 vsubl_s32 (int32x2_t __a, int32x2_t __b)
   1947 {
   1948   return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
   1949 }
   1950 
   1951 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1952 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
   1953 {
   1954   return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
   1955 						   (int8x8_t) __b);
   1956 }
   1957 
   1958 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1959 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
   1960 {
   1961   return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
   1962 						   (int16x4_t) __b);
   1963 }
   1964 
   1965 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   1966 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
   1967 {
   1968   return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
   1969 						   (int32x2_t) __b);
   1970 }
   1971 
   1972 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   1973 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
   1974 {
   1975   return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
   1976 }
   1977 
   1978 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   1979 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
   1980 {
   1981   return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
   1982 }
   1983 
   1984 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   1985 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
   1986 {
   1987   return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
   1988 }
   1989 
   1990 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   1991 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
   1992 {
   1993   return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
   1994 						     (int8x16_t) __b);
   1995 }
   1996 
   1997 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   1998 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
   1999 {
   2000   return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
   2001 						    (int16x8_t) __b);
   2002 }
   2003 
   2004 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   2005 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
   2006 {
   2007   return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
   2008 						    (int32x4_t) __b);
   2009 }
   2010 
   2011 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2012 vsubw_s8 (int16x8_t __a, int8x8_t __b)
   2013 {
   2014   return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
   2015 }
   2016 
   2017 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2018 vsubw_s16 (int32x4_t __a, int16x4_t __b)
   2019 {
   2020   return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
   2021 }
   2022 
   2023 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   2024 vsubw_s32 (int64x2_t __a, int32x2_t __b)
   2025 {
   2026   return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
   2027 }
   2028 
   2029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   2030 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
   2031 {
   2032   return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
   2033 						   (int8x8_t) __b);
   2034 }
   2035 
   2036 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   2037 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
   2038 {
   2039   return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
   2040 						   (int16x4_t) __b);
   2041 }
   2042 
   2043 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   2044 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
   2045 {
   2046   return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
   2047 						   (int32x2_t) __b);
   2048 }
   2049 
   2050 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2051 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
   2052 {
   2053   return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
   2054 }
   2055 
   2056 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2057 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
   2058 {
   2059   return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
   2060 }
   2061 
   2062 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   2063 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
   2064 {
   2065   return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
   2066 }
   2067 
   2068 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   2069 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
   2070 {
   2071   return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
   2072 						     (int8x16_t) __b);
   2073 }
   2074 
   2075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   2076 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
   2077 {
   2078   return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
   2079 						    (int16x8_t) __b);
   2080 }
   2081 
   2082 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   2083 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
   2084 {
   2085   return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
   2086 						    (int32x4_t) __b);
   2087 }
   2088 
   2089 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   2090 vqadd_s8 (int8x8_t __a, int8x8_t __b)
   2091 {
   2092   return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
   2093 }
   2094 
   2095 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   2096 vqadd_s16 (int16x4_t __a, int16x4_t __b)
   2097 {
   2098   return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
   2099 }
   2100 
   2101 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   2102 vqadd_s32 (int32x2_t __a, int32x2_t __b)
   2103 {
   2104   return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
   2105 }
   2106 
   2107 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   2108 vqadd_s64 (int64x1_t __a, int64x1_t __b)
   2109 {
   2110   return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
   2111 }
   2112 
   2113 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   2114 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
   2115 {
   2116   return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
   2117 						  (int8x8_t) __b);
   2118 }
   2119 
   2120 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   2121 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
   2122 {
   2123   return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
   2124 						   (int16x4_t) __b);
   2125 }
   2126 
   2127 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   2128 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
   2129 {
   2130   return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
   2131 						   (int32x2_t) __b);
   2132 }
   2133 
   2134 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   2135 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
   2136 {
   2137   return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
   2138 						 (int64x1_t) __b);
   2139 }
   2140 
   2141 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   2142 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
   2143 {
   2144   return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
   2145 }
   2146 
   2147 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2148 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
   2149 {
   2150   return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
   2151 }
   2152 
   2153 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2154 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
   2155 {
   2156   return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
   2157 }
   2158 
   2159 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   2160 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
   2161 {
   2162   return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
   2163 }
   2164 
   2165 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   2166 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
   2167 {
   2168   return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
   2169 						    (int8x16_t) __b);
   2170 }
   2171 
   2172 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   2173 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
   2174 {
   2175   return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
   2176 						   (int16x8_t) __b);
   2177 }
   2178 
   2179 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   2180 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
   2181 {
   2182   return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
   2183 						   (int32x4_t) __b);
   2184 }
   2185 
   2186 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   2187 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
   2188 {
   2189   return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
   2190 						   (int64x2_t) __b);
   2191 }
   2192 
   2193 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   2194 vqsub_s8 (int8x8_t __a, int8x8_t __b)
   2195 {
   2196   return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
   2197 }
   2198 
   2199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   2200 vqsub_s16 (int16x4_t __a, int16x4_t __b)
   2201 {
   2202   return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
   2203 }
   2204 
   2205 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   2206 vqsub_s32 (int32x2_t __a, int32x2_t __b)
   2207 {
   2208   return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
   2209 }
   2210 
   2211 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   2212 vqsub_s64 (int64x1_t __a, int64x1_t __b)
   2213 {
   2214   return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
   2215 }
   2216 
   2217 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   2218 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
   2219 {
   2220   return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
   2221 						  (int8x8_t) __b);
   2222 }
   2223 
   2224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   2225 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
   2226 {
   2227   return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
   2228 						   (int16x4_t) __b);
   2229 }
   2230 
   2231 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   2232 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
   2233 {
   2234   return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
   2235 						   (int32x2_t) __b);
   2236 }
   2237 
   2238 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   2239 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
   2240 {
   2241   return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
   2242 						 (int64x1_t) __b);
   2243 }
   2244 
   2245 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   2246 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
   2247 {
   2248   return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
   2249 }
   2250 
   2251 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2252 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
   2253 {
   2254   return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
   2255 }
   2256 
   2257 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2258 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
   2259 {
   2260   return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
   2261 }
   2262 
   2263 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   2264 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
   2265 {
   2266   return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
   2267 }
   2268 
   2269 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   2270 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
   2271 {
   2272   return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
   2273 						    (int8x16_t) __b);
   2274 }
   2275 
   2276 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   2277 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
   2278 {
   2279   return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
   2280 						   (int16x8_t) __b);
   2281 }
   2282 
   2283 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   2284 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
   2285 {
   2286   return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
   2287 						   (int32x4_t) __b);
   2288 }
   2289 
   2290 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   2291 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
   2292 {
   2293   return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
   2294 						   (int64x2_t) __b);
   2295 }
   2296 
   2297 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   2298 vqneg_s8 (int8x8_t __a)
   2299 {
   2300   return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
   2301 }
   2302 
   2303 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   2304 vqneg_s16 (int16x4_t __a)
   2305 {
   2306   return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
   2307 }
   2308 
   2309 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   2310 vqneg_s32 (int32x2_t __a)
   2311 {
   2312   return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
   2313 }
   2314 
   2315 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   2316 vqnegq_s8 (int8x16_t __a)
   2317 {
   2318   return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
   2319 }
   2320 
   2321 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2322 vqnegq_s16 (int16x8_t __a)
   2323 {
   2324   return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
   2325 }
   2326 
   2327 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2328 vqnegq_s32 (int32x4_t __a)
   2329 {
   2330   return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
   2331 }
   2332 
   2333 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   2334 vqabs_s8 (int8x8_t __a)
   2335 {
   2336   return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
   2337 }
   2338 
   2339 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   2340 vqabs_s16 (int16x4_t __a)
   2341 {
   2342   return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
   2343 }
   2344 
   2345 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   2346 vqabs_s32 (int32x2_t __a)
   2347 {
   2348   return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
   2349 }
   2350 
   2351 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   2352 vqabsq_s8 (int8x16_t __a)
   2353 {
   2354   return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
   2355 }
   2356 
   2357 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2358 vqabsq_s16 (int16x8_t __a)
   2359 {
   2360   return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
   2361 }
   2362 
   2363 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2364 vqabsq_s32 (int32x4_t __a)
   2365 {
   2366   return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
   2367 }
   2368 
   2369 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   2370 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
   2371 {
   2372   return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
   2373 }
   2374 
   2375 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   2376 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
   2377 {
   2378   return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
   2379 }
   2380 
   2381 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2382 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
   2383 {
   2384   return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
   2385 }
   2386 
   2387 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2388 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
   2389 {
   2390   return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
   2391 }
   2392 
   2393 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   2394 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
   2395 {
   2396   return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
   2397 }
   2398 
   2399 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   2400 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
   2401 {
   2402   return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
   2403 }
   2404 
   2405 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   2406 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
   2407 {
   2408   return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
   2409 }
   2410 
   2411 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   2412 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
   2413 {
   2414   return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
   2415 }
   2416 
   2417 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   2418 vcreate_s8 (uint64_t __a)
   2419 {
   2420   return (int8x8_t) __a;
   2421 }
   2422 
   2423 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   2424 vcreate_s16 (uint64_t __a)
   2425 {
   2426   return (int16x4_t) __a;
   2427 }
   2428 
   2429 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   2430 vcreate_s32 (uint64_t __a)
   2431 {
   2432   return (int32x2_t) __a;
   2433 }
   2434 
   2435 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   2436 vcreate_s64 (uint64_t __a)
   2437 {
   2438   return (int64x1_t) __a;
   2439 }
   2440 
   2441 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2442 vcreate_f32 (uint64_t __a)
   2443 {
   2444   return (float32x2_t) __a;
   2445 }
   2446 
   2447 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   2448 vcreate_u8 (uint64_t __a)
   2449 {
   2450   return (uint8x8_t) __a;
   2451 }
   2452 
   2453 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   2454 vcreate_u16 (uint64_t __a)
   2455 {
   2456   return (uint16x4_t) __a;
   2457 }
   2458 
   2459 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   2460 vcreate_u32 (uint64_t __a)
   2461 {
   2462   return (uint32x2_t) __a;
   2463 }
   2464 
   2465 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   2466 vcreate_u64 (uint64_t __a)
   2467 {
   2468   return (uint64x1_t) __a;
   2469 }
   2470 
   2471 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   2472 vcreate_f64 (uint64_t __a)
   2473 {
   2474   return (float64x1_t) __builtin_aarch64_createdf (__a);
   2475 }
   2476 
   2477 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2478 vcreate_p8 (uint64_t __a)
   2479 {
   2480   return (poly8x8_t) __a;
   2481 }
   2482 
   2483 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2484 vcreate_p16 (uint64_t __a)
   2485 {
   2486   return (poly16x4_t) __a;
   2487 }
   2488 
   2489 /* vget_lane  */
   2490 
   2491 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   2492 vget_lane_f32 (float32x2_t __a, const int __b)
   2493 {
   2494   return __aarch64_vget_lane_f32 (__a, __b);
   2495 }
   2496 
   2497 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   2498 vget_lane_f64 (float64x1_t __a, const int __b)
   2499 {
   2500   return __aarch64_vget_lane_f64 (__a, __b);
   2501 }
   2502 
   2503 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
   2504 vget_lane_p8 (poly8x8_t __a, const int __b)
   2505 {
   2506   return __aarch64_vget_lane_p8 (__a, __b);
   2507 }
   2508 
   2509 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
   2510 vget_lane_p16 (poly16x4_t __a, const int __b)
   2511 {
   2512   return __aarch64_vget_lane_p16 (__a, __b);
   2513 }
   2514 
   2515 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   2516 vget_lane_s8 (int8x8_t __a, const int __b)
   2517 {
   2518   return __aarch64_vget_lane_s8 (__a, __b);
   2519 }
   2520 
   2521 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   2522 vget_lane_s16 (int16x4_t __a, const int __b)
   2523 {
   2524   return __aarch64_vget_lane_s16 (__a, __b);
   2525 }
   2526 
   2527 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   2528 vget_lane_s32 (int32x2_t __a, const int __b)
   2529 {
   2530   return __aarch64_vget_lane_s32 (__a, __b);
   2531 }
   2532 
   2533 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   2534 vget_lane_s64 (int64x1_t __a, const int __b)
   2535 {
   2536   return __aarch64_vget_lane_s64 (__a, __b);
   2537 }
   2538 
   2539 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   2540 vget_lane_u8 (uint8x8_t __a, const int __b)
   2541 {
   2542   return __aarch64_vget_lane_u8 (__a, __b);
   2543 }
   2544 
   2545 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   2546 vget_lane_u16 (uint16x4_t __a, const int __b)
   2547 {
   2548   return __aarch64_vget_lane_u16 (__a, __b);
   2549 }
   2550 
   2551 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   2552 vget_lane_u32 (uint32x2_t __a, const int __b)
   2553 {
   2554   return __aarch64_vget_lane_u32 (__a, __b);
   2555 }
   2556 
   2557 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   2558 vget_lane_u64 (uint64x1_t __a, const int __b)
   2559 {
   2560   return __aarch64_vget_lane_u64 (__a, __b);
   2561 }
   2562 
   2563 /* vgetq_lane  */
   2564 
   2565 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   2566 vgetq_lane_f32 (float32x4_t __a, const int __b)
   2567 {
   2568   return __aarch64_vgetq_lane_f32 (__a, __b);
   2569 }
   2570 
   2571 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   2572 vgetq_lane_f64 (float64x2_t __a, const int __b)
   2573 {
   2574   return __aarch64_vgetq_lane_f64 (__a, __b);
   2575 }
   2576 
   2577 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
   2578 vgetq_lane_p8 (poly8x16_t __a, const int __b)
   2579 {
   2580   return __aarch64_vgetq_lane_p8 (__a, __b);
   2581 }
   2582 
   2583 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
   2584 vgetq_lane_p16 (poly16x8_t __a, const int __b)
   2585 {
   2586   return __aarch64_vgetq_lane_p16 (__a, __b);
   2587 }
   2588 
   2589 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   2590 vgetq_lane_s8 (int8x16_t __a, const int __b)
   2591 {
   2592   return __aarch64_vgetq_lane_s8 (__a, __b);
   2593 }
   2594 
   2595 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   2596 vgetq_lane_s16 (int16x8_t __a, const int __b)
   2597 {
   2598   return __aarch64_vgetq_lane_s16 (__a, __b);
   2599 }
   2600 
   2601 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   2602 vgetq_lane_s32 (int32x4_t __a, const int __b)
   2603 {
   2604   return __aarch64_vgetq_lane_s32 (__a, __b);
   2605 }
   2606 
   2607 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   2608 vgetq_lane_s64 (int64x2_t __a, const int __b)
   2609 {
   2610   return __aarch64_vgetq_lane_s64 (__a, __b);
   2611 }
   2612 
   2613 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   2614 vgetq_lane_u8 (uint8x16_t __a, const int __b)
   2615 {
   2616   return __aarch64_vgetq_lane_u8 (__a, __b);
   2617 }
   2618 
   2619 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   2620 vgetq_lane_u16 (uint16x8_t __a, const int __b)
   2621 {
   2622   return __aarch64_vgetq_lane_u16 (__a, __b);
   2623 }
   2624 
   2625 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   2626 vgetq_lane_u32 (uint32x4_t __a, const int __b)
   2627 {
   2628   return __aarch64_vgetq_lane_u32 (__a, __b);
   2629 }
   2630 
   2631 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   2632 vgetq_lane_u64 (uint64x2_t __a, const int __b)
   2633 {
   2634   return __aarch64_vgetq_lane_u64 (__a, __b);
   2635 }
   2636 
   2637 /* vreinterpret  */
   2638 
   2639 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2640 vreinterpret_p8_s8 (int8x8_t __a)
   2641 {
   2642   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
   2643 }
   2644 
   2645 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2646 vreinterpret_p8_s16 (int16x4_t __a)
   2647 {
   2648   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
   2649 }
   2650 
   2651 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2652 vreinterpret_p8_s32 (int32x2_t __a)
   2653 {
   2654   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
   2655 }
   2656 
   2657 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2658 vreinterpret_p8_s64 (int64x1_t __a)
   2659 {
   2660   return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
   2661 }
   2662 
   2663 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2664 vreinterpret_p8_f32 (float32x2_t __a)
   2665 {
   2666   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
   2667 }
   2668 
   2669 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2670 vreinterpret_p8_u8 (uint8x8_t __a)
   2671 {
   2672   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
   2673 }
   2674 
   2675 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2676 vreinterpret_p8_u16 (uint16x4_t __a)
   2677 {
   2678   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
   2679 }
   2680 
   2681 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2682 vreinterpret_p8_u32 (uint32x2_t __a)
   2683 {
   2684   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
   2685 }
   2686 
   2687 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2688 vreinterpret_p8_u64 (uint64x1_t __a)
   2689 {
   2690   return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
   2691 }
   2692 
   2693 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   2694 vreinterpret_p8_p16 (poly16x4_t __a)
   2695 {
   2696   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
   2697 }
   2698 
   2699 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2700 vreinterpretq_p8_s8 (int8x16_t __a)
   2701 {
   2702   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
   2703 }
   2704 
   2705 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2706 vreinterpretq_p8_s16 (int16x8_t __a)
   2707 {
   2708   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
   2709 }
   2710 
   2711 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2712 vreinterpretq_p8_s32 (int32x4_t __a)
   2713 {
   2714   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
   2715 }
   2716 
   2717 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2718 vreinterpretq_p8_s64 (int64x2_t __a)
   2719 {
   2720   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
   2721 }
   2722 
   2723 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2724 vreinterpretq_p8_f32 (float32x4_t __a)
   2725 {
   2726   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
   2727 }
   2728 
   2729 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2730 vreinterpretq_p8_u8 (uint8x16_t __a)
   2731 {
   2732   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
   2733 							       __a);
   2734 }
   2735 
   2736 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2737 vreinterpretq_p8_u16 (uint16x8_t __a)
   2738 {
   2739   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
   2740 							      __a);
   2741 }
   2742 
   2743 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2744 vreinterpretq_p8_u32 (uint32x4_t __a)
   2745 {
   2746   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
   2747 							      __a);
   2748 }
   2749 
   2750 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2751 vreinterpretq_p8_u64 (uint64x2_t __a)
   2752 {
   2753   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
   2754 							      __a);
   2755 }
   2756 
   2757 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   2758 vreinterpretq_p8_p16 (poly16x8_t __a)
   2759 {
   2760   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
   2761 							      __a);
   2762 }
   2763 
   2764 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2765 vreinterpret_p16_s8 (int8x8_t __a)
   2766 {
   2767   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
   2768 }
   2769 
   2770 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2771 vreinterpret_p16_s16 (int16x4_t __a)
   2772 {
   2773   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
   2774 }
   2775 
   2776 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2777 vreinterpret_p16_s32 (int32x2_t __a)
   2778 {
   2779   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
   2780 }
   2781 
   2782 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2783 vreinterpret_p16_s64 (int64x1_t __a)
   2784 {
   2785   return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
   2786 }
   2787 
   2788 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2789 vreinterpret_p16_f32 (float32x2_t __a)
   2790 {
   2791   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
   2792 }
   2793 
   2794 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2795 vreinterpret_p16_u8 (uint8x8_t __a)
   2796 {
   2797   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
   2798 }
   2799 
   2800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2801 vreinterpret_p16_u16 (uint16x4_t __a)
   2802 {
   2803   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
   2804 }
   2805 
   2806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2807 vreinterpret_p16_u32 (uint32x2_t __a)
   2808 {
   2809   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
   2810 }
   2811 
   2812 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2813 vreinterpret_p16_u64 (uint64x1_t __a)
   2814 {
   2815   return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
   2816 }
   2817 
   2818 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   2819 vreinterpret_p16_p8 (poly8x8_t __a)
   2820 {
   2821   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
   2822 }
   2823 
   2824 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2825 vreinterpretq_p16_s8 (int8x16_t __a)
   2826 {
   2827   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
   2828 }
   2829 
   2830 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2831 vreinterpretq_p16_s16 (int16x8_t __a)
   2832 {
   2833   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
   2834 }
   2835 
   2836 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2837 vreinterpretq_p16_s32 (int32x4_t __a)
   2838 {
   2839   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
   2840 }
   2841 
   2842 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2843 vreinterpretq_p16_s64 (int64x2_t __a)
   2844 {
   2845   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
   2846 }
   2847 
   2848 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2849 vreinterpretq_p16_f32 (float32x4_t __a)
   2850 {
   2851   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
   2852 }
   2853 
   2854 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2855 vreinterpretq_p16_u8 (uint8x16_t __a)
   2856 {
   2857   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
   2858 							      __a);
   2859 }
   2860 
   2861 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2862 vreinterpretq_p16_u16 (uint16x8_t __a)
   2863 {
   2864   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
   2865 }
   2866 
   2867 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2868 vreinterpretq_p16_u32 (uint32x4_t __a)
   2869 {
   2870   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
   2871 }
   2872 
   2873 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2874 vreinterpretq_p16_u64 (uint64x2_t __a)
   2875 {
   2876   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
   2877 }
   2878 
   2879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   2880 vreinterpretq_p16_p8 (poly8x16_t __a)
   2881 {
   2882   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
   2883 							      __a);
   2884 }
   2885 
   2886 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2887 vreinterpret_f32_s8 (int8x8_t __a)
   2888 {
   2889   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
   2890 }
   2891 
   2892 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2893 vreinterpret_f32_s16 (int16x4_t __a)
   2894 {
   2895   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
   2896 }
   2897 
   2898 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2899 vreinterpret_f32_s32 (int32x2_t __a)
   2900 {
   2901   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
   2902 }
   2903 
   2904 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2905 vreinterpret_f32_s64 (int64x1_t __a)
   2906 {
   2907   return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
   2908 }
   2909 
   2910 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2911 vreinterpret_f32_u8 (uint8x8_t __a)
   2912 {
   2913   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
   2914 }
   2915 
   2916 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2917 vreinterpret_f32_u16 (uint16x4_t __a)
   2918 {
   2919   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
   2920 							      __a);
   2921 }
   2922 
   2923 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2924 vreinterpret_f32_u32 (uint32x2_t __a)
   2925 {
   2926   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
   2927 							      __a);
   2928 }
   2929 
   2930 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2931 vreinterpret_f32_u64 (uint64x1_t __a)
   2932 {
   2933   return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
   2934 }
   2935 
   2936 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2937 vreinterpret_f32_p8 (poly8x8_t __a)
   2938 {
   2939   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
   2940 }
   2941 
   2942 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   2943 vreinterpret_f32_p16 (poly16x4_t __a)
   2944 {
   2945   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
   2946 							      __a);
   2947 }
   2948 
   2949 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2950 vreinterpretq_f32_s8 (int8x16_t __a)
   2951 {
   2952   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
   2953 }
   2954 
   2955 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2956 vreinterpretq_f32_s16 (int16x8_t __a)
   2957 {
   2958   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
   2959 }
   2960 
   2961 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2962 vreinterpretq_f32_s32 (int32x4_t __a)
   2963 {
   2964   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
   2965 }
   2966 
   2967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2968 vreinterpretq_f32_s64 (int64x2_t __a)
   2969 {
   2970   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
   2971 }
   2972 
   2973 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2974 vreinterpretq_f32_u8 (uint8x16_t __a)
   2975 {
   2976   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
   2977 							       __a);
   2978 }
   2979 
   2980 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2981 vreinterpretq_f32_u16 (uint16x8_t __a)
   2982 {
   2983   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
   2984 							      __a);
   2985 }
   2986 
   2987 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2988 vreinterpretq_f32_u32 (uint32x4_t __a)
   2989 {
   2990   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
   2991 							      __a);
   2992 }
   2993 
   2994 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   2995 vreinterpretq_f32_u64 (uint64x2_t __a)
   2996 {
   2997   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
   2998 							      __a);
   2999 }
   3000 
   3001 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   3002 vreinterpretq_f32_p8 (poly8x16_t __a)
   3003 {
   3004   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
   3005 							       __a);
   3006 }
   3007 
   3008 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   3009 vreinterpretq_f32_p16 (poly16x8_t __a)
   3010 {
   3011   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
   3012 							      __a);
   3013 }
   3014 
   3015 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3016 vreinterpret_s64_s8 (int8x8_t __a)
   3017 {
   3018   return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
   3019 }
   3020 
   3021 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3022 vreinterpret_s64_s16 (int16x4_t __a)
   3023 {
   3024   return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
   3025 }
   3026 
   3027 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3028 vreinterpret_s64_s32 (int32x2_t __a)
   3029 {
   3030   return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
   3031 }
   3032 
   3033 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3034 vreinterpret_s64_f32 (float32x2_t __a)
   3035 {
   3036   return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
   3037 }
   3038 
   3039 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3040 vreinterpret_s64_u8 (uint8x8_t __a)
   3041 {
   3042   return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
   3043 }
   3044 
   3045 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3046 vreinterpret_s64_u16 (uint16x4_t __a)
   3047 {
   3048   return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
   3049 }
   3050 
   3051 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3052 vreinterpret_s64_u32 (uint32x2_t __a)
   3053 {
   3054   return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
   3055 }
   3056 
   3057 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3058 vreinterpret_s64_u64 (uint64x1_t __a)
   3059 {
   3060   return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
   3061 }
   3062 
   3063 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3064 vreinterpret_s64_p8 (poly8x8_t __a)
   3065 {
   3066   return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
   3067 }
   3068 
   3069 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   3070 vreinterpret_s64_p16 (poly16x4_t __a)
   3071 {
   3072   return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
   3073 }
   3074 
   3075 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3076 vreinterpretq_s64_s8 (int8x16_t __a)
   3077 {
   3078   return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
   3079 }
   3080 
   3081 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3082 vreinterpretq_s64_s16 (int16x8_t __a)
   3083 {
   3084   return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
   3085 }
   3086 
   3087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3088 vreinterpretq_s64_s32 (int32x4_t __a)
   3089 {
   3090   return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
   3091 }
   3092 
   3093 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3094 vreinterpretq_s64_f32 (float32x4_t __a)
   3095 {
   3096   return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
   3097 }
   3098 
   3099 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3100 vreinterpretq_s64_u8 (uint8x16_t __a)
   3101 {
   3102   return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
   3103 }
   3104 
   3105 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3106 vreinterpretq_s64_u16 (uint16x8_t __a)
   3107 {
   3108   return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
   3109 }
   3110 
   3111 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3112 vreinterpretq_s64_u32 (uint32x4_t __a)
   3113 {
   3114   return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
   3115 }
   3116 
   3117 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3118 vreinterpretq_s64_u64 (uint64x2_t __a)
   3119 {
   3120   return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
   3121 }
   3122 
   3123 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3124 vreinterpretq_s64_p8 (poly8x16_t __a)
   3125 {
   3126   return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
   3127 }
   3128 
   3129 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   3130 vreinterpretq_s64_p16 (poly16x8_t __a)
   3131 {
   3132   return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
   3133 }
   3134 
   3135 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3136 vreinterpret_u64_s8 (int8x8_t __a)
   3137 {
   3138   return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
   3139 }
   3140 
   3141 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3142 vreinterpret_u64_s16 (int16x4_t __a)
   3143 {
   3144   return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
   3145 }
   3146 
   3147 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3148 vreinterpret_u64_s32 (int32x2_t __a)
   3149 {
   3150   return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
   3151 }
   3152 
   3153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3154 vreinterpret_u64_s64 (int64x1_t __a)
   3155 {
   3156   return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
   3157 }
   3158 
   3159 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3160 vreinterpret_u64_f32 (float32x2_t __a)
   3161 {
   3162   return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
   3163 }
   3164 
   3165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3166 vreinterpret_u64_u8 (uint8x8_t __a)
   3167 {
   3168   return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
   3169 }
   3170 
   3171 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3172 vreinterpret_u64_u16 (uint16x4_t __a)
   3173 {
   3174   return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
   3175 }
   3176 
   3177 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3178 vreinterpret_u64_u32 (uint32x2_t __a)
   3179 {
   3180   return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
   3181 }
   3182 
   3183 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3184 vreinterpret_u64_p8 (poly8x8_t __a)
   3185 {
   3186   return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
   3187 }
   3188 
   3189 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   3190 vreinterpret_u64_p16 (poly16x4_t __a)
   3191 {
   3192   return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
   3193 }
   3194 
   3195 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3196 vreinterpretq_u64_s8 (int8x16_t __a)
   3197 {
   3198   return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
   3199 }
   3200 
   3201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3202 vreinterpretq_u64_s16 (int16x8_t __a)
   3203 {
   3204   return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
   3205 }
   3206 
   3207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3208 vreinterpretq_u64_s32 (int32x4_t __a)
   3209 {
   3210   return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
   3211 }
   3212 
   3213 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3214 vreinterpretq_u64_s64 (int64x2_t __a)
   3215 {
   3216   return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
   3217 }
   3218 
   3219 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3220 vreinterpretq_u64_f32 (float32x4_t __a)
   3221 {
   3222   return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
   3223 }
   3224 
   3225 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3226 vreinterpretq_u64_u8 (uint8x16_t __a)
   3227 {
   3228   return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
   3229 							      __a);
   3230 }
   3231 
   3232 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3233 vreinterpretq_u64_u16 (uint16x8_t __a)
   3234 {
   3235   return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
   3236 }
   3237 
   3238 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3239 vreinterpretq_u64_u32 (uint32x4_t __a)
   3240 {
   3241   return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
   3242 }
   3243 
   3244 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3245 vreinterpretq_u64_p8 (poly8x16_t __a)
   3246 {
   3247   return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
   3248 							      __a);
   3249 }
   3250 
   3251 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   3252 vreinterpretq_u64_p16 (poly16x8_t __a)
   3253 {
   3254   return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
   3255 }
   3256 
   3257 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3258 vreinterpret_s8_s16 (int16x4_t __a)
   3259 {
   3260   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
   3261 }
   3262 
   3263 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3264 vreinterpret_s8_s32 (int32x2_t __a)
   3265 {
   3266   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
   3267 }
   3268 
   3269 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3270 vreinterpret_s8_s64 (int64x1_t __a)
   3271 {
   3272   return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
   3273 }
   3274 
   3275 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3276 vreinterpret_s8_f32 (float32x2_t __a)
   3277 {
   3278   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
   3279 }
   3280 
   3281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3282 vreinterpret_s8_u8 (uint8x8_t __a)
   3283 {
   3284   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
   3285 }
   3286 
   3287 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3288 vreinterpret_s8_u16 (uint16x4_t __a)
   3289 {
   3290   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
   3291 }
   3292 
   3293 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3294 vreinterpret_s8_u32 (uint32x2_t __a)
   3295 {
   3296   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
   3297 }
   3298 
   3299 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3300 vreinterpret_s8_u64 (uint64x1_t __a)
   3301 {
   3302   return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
   3303 }
   3304 
   3305 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3306 vreinterpret_s8_p8 (poly8x8_t __a)
   3307 {
   3308   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
   3309 }
   3310 
   3311 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   3312 vreinterpret_s8_p16 (poly16x4_t __a)
   3313 {
   3314   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
   3315 }
   3316 
   3317 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3318 vreinterpretq_s8_s16 (int16x8_t __a)
   3319 {
   3320   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
   3321 }
   3322 
   3323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3324 vreinterpretq_s8_s32 (int32x4_t __a)
   3325 {
   3326   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
   3327 }
   3328 
   3329 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3330 vreinterpretq_s8_s64 (int64x2_t __a)
   3331 {
   3332   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
   3333 }
   3334 
   3335 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3336 vreinterpretq_s8_f32 (float32x4_t __a)
   3337 {
   3338   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
   3339 }
   3340 
   3341 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3342 vreinterpretq_s8_u8 (uint8x16_t __a)
   3343 {
   3344   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
   3345 							      __a);
   3346 }
   3347 
   3348 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3349 vreinterpretq_s8_u16 (uint16x8_t __a)
   3350 {
   3351   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
   3352 }
   3353 
   3354 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3355 vreinterpretq_s8_u32 (uint32x4_t __a)
   3356 {
   3357   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
   3358 }
   3359 
   3360 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3361 vreinterpretq_s8_u64 (uint64x2_t __a)
   3362 {
   3363   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
   3364 }
   3365 
   3366 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3367 vreinterpretq_s8_p8 (poly8x16_t __a)
   3368 {
   3369   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
   3370 							      __a);
   3371 }
   3372 
   3373 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   3374 vreinterpretq_s8_p16 (poly16x8_t __a)
   3375 {
   3376   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
   3377 }
   3378 
   3379 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3380 vreinterpret_s16_s8 (int8x8_t __a)
   3381 {
   3382   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
   3383 }
   3384 
   3385 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3386 vreinterpret_s16_s32 (int32x2_t __a)
   3387 {
   3388   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
   3389 }
   3390 
   3391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3392 vreinterpret_s16_s64 (int64x1_t __a)
   3393 {
   3394   return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
   3395 }
   3396 
   3397 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3398 vreinterpret_s16_f32 (float32x2_t __a)
   3399 {
   3400   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
   3401 }
   3402 
   3403 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3404 vreinterpret_s16_u8 (uint8x8_t __a)
   3405 {
   3406   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
   3407 }
   3408 
   3409 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3410 vreinterpret_s16_u16 (uint16x4_t __a)
   3411 {
   3412   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
   3413 }
   3414 
   3415 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3416 vreinterpret_s16_u32 (uint32x2_t __a)
   3417 {
   3418   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
   3419 }
   3420 
   3421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3422 vreinterpret_s16_u64 (uint64x1_t __a)
   3423 {
   3424   return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
   3425 }
   3426 
   3427 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3428 vreinterpret_s16_p8 (poly8x8_t __a)
   3429 {
   3430   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
   3431 }
   3432 
   3433 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   3434 vreinterpret_s16_p16 (poly16x4_t __a)
   3435 {
   3436   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
   3437 }
   3438 
   3439 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3440 vreinterpretq_s16_s8 (int8x16_t __a)
   3441 {
   3442   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
   3443 }
   3444 
   3445 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3446 vreinterpretq_s16_s32 (int32x4_t __a)
   3447 {
   3448   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
   3449 }
   3450 
   3451 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3452 vreinterpretq_s16_s64 (int64x2_t __a)
   3453 {
   3454   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
   3455 }
   3456 
   3457 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3458 vreinterpretq_s16_f32 (float32x4_t __a)
   3459 {
   3460   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
   3461 }
   3462 
   3463 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3464 vreinterpretq_s16_u8 (uint8x16_t __a)
   3465 {
   3466   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
   3467 }
   3468 
   3469 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3470 vreinterpretq_s16_u16 (uint16x8_t __a)
   3471 {
   3472   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
   3473 }
   3474 
   3475 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3476 vreinterpretq_s16_u32 (uint32x4_t __a)
   3477 {
   3478   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
   3479 }
   3480 
   3481 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3482 vreinterpretq_s16_u64 (uint64x2_t __a)
   3483 {
   3484   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
   3485 }
   3486 
   3487 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3488 vreinterpretq_s16_p8 (poly8x16_t __a)
   3489 {
   3490   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
   3491 }
   3492 
   3493 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   3494 vreinterpretq_s16_p16 (poly16x8_t __a)
   3495 {
   3496   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
   3497 }
   3498 
   3499 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3500 vreinterpret_s32_s8 (int8x8_t __a)
   3501 {
   3502   return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
   3503 }
   3504 
   3505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3506 vreinterpret_s32_s16 (int16x4_t __a)
   3507 {
   3508   return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
   3509 }
   3510 
   3511 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3512 vreinterpret_s32_s64 (int64x1_t __a)
   3513 {
   3514   return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
   3515 }
   3516 
   3517 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3518 vreinterpret_s32_f32 (float32x2_t __a)
   3519 {
   3520   return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
   3521 }
   3522 
   3523 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3524 vreinterpret_s32_u8 (uint8x8_t __a)
   3525 {
   3526   return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
   3527 }
   3528 
   3529 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3530 vreinterpret_s32_u16 (uint16x4_t __a)
   3531 {
   3532   return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
   3533 }
   3534 
   3535 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3536 vreinterpret_s32_u32 (uint32x2_t __a)
   3537 {
   3538   return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
   3539 }
   3540 
   3541 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3542 vreinterpret_s32_u64 (uint64x1_t __a)
   3543 {
   3544   return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
   3545 }
   3546 
   3547 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3548 vreinterpret_s32_p8 (poly8x8_t __a)
   3549 {
   3550   return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
   3551 }
   3552 
   3553 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   3554 vreinterpret_s32_p16 (poly16x4_t __a)
   3555 {
   3556   return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
   3557 }
   3558 
   3559 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3560 vreinterpretq_s32_s8 (int8x16_t __a)
   3561 {
   3562   return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
   3563 }
   3564 
   3565 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3566 vreinterpretq_s32_s16 (int16x8_t __a)
   3567 {
   3568   return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
   3569 }
   3570 
   3571 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3572 vreinterpretq_s32_s64 (int64x2_t __a)
   3573 {
   3574   return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
   3575 }
   3576 
   3577 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3578 vreinterpretq_s32_f32 (float32x4_t __a)
   3579 {
   3580   return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
   3581 }
   3582 
   3583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3584 vreinterpretq_s32_u8 (uint8x16_t __a)
   3585 {
   3586   return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
   3587 }
   3588 
   3589 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3590 vreinterpretq_s32_u16 (uint16x8_t __a)
   3591 {
   3592   return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
   3593 }
   3594 
   3595 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3596 vreinterpretq_s32_u32 (uint32x4_t __a)
   3597 {
   3598   return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
   3599 }
   3600 
   3601 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3602 vreinterpretq_s32_u64 (uint64x2_t __a)
   3603 {
   3604   return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
   3605 }
   3606 
   3607 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3608 vreinterpretq_s32_p8 (poly8x16_t __a)
   3609 {
   3610   return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
   3611 }
   3612 
   3613 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   3614 vreinterpretq_s32_p16 (poly16x8_t __a)
   3615 {
   3616   return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
   3617 }
   3618 
   3619 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3620 vreinterpret_u8_s8 (int8x8_t __a)
   3621 {
   3622   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
   3623 }
   3624 
   3625 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3626 vreinterpret_u8_s16 (int16x4_t __a)
   3627 {
   3628   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
   3629 }
   3630 
   3631 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3632 vreinterpret_u8_s32 (int32x2_t __a)
   3633 {
   3634   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
   3635 }
   3636 
   3637 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3638 vreinterpret_u8_s64 (int64x1_t __a)
   3639 {
   3640   return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
   3641 }
   3642 
   3643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3644 vreinterpret_u8_f32 (float32x2_t __a)
   3645 {
   3646   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
   3647 }
   3648 
   3649 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3650 vreinterpret_u8_u16 (uint16x4_t __a)
   3651 {
   3652   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
   3653 }
   3654 
   3655 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3656 vreinterpret_u8_u32 (uint32x2_t __a)
   3657 {
   3658   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
   3659 }
   3660 
   3661 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3662 vreinterpret_u8_u64 (uint64x1_t __a)
   3663 {
   3664   return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
   3665 }
   3666 
   3667 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3668 vreinterpret_u8_p8 (poly8x8_t __a)
   3669 {
   3670   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
   3671 }
   3672 
   3673 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   3674 vreinterpret_u8_p16 (poly16x4_t __a)
   3675 {
   3676   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
   3677 }
   3678 
   3679 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3680 vreinterpretq_u8_s8 (int8x16_t __a)
   3681 {
   3682   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
   3683 }
   3684 
   3685 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3686 vreinterpretq_u8_s16 (int16x8_t __a)
   3687 {
   3688   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
   3689 }
   3690 
   3691 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3692 vreinterpretq_u8_s32 (int32x4_t __a)
   3693 {
   3694   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
   3695 }
   3696 
   3697 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3698 vreinterpretq_u8_s64 (int64x2_t __a)
   3699 {
   3700   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
   3701 }
   3702 
   3703 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3704 vreinterpretq_u8_f32 (float32x4_t __a)
   3705 {
   3706   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
   3707 }
   3708 
   3709 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3710 vreinterpretq_u8_u16 (uint16x8_t __a)
   3711 {
   3712   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
   3713 							      __a);
   3714 }
   3715 
   3716 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3717 vreinterpretq_u8_u32 (uint32x4_t __a)
   3718 {
   3719   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
   3720 							      __a);
   3721 }
   3722 
   3723 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3724 vreinterpretq_u8_u64 (uint64x2_t __a)
   3725 {
   3726   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
   3727 							      __a);
   3728 }
   3729 
   3730 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3731 vreinterpretq_u8_p8 (poly8x16_t __a)
   3732 {
   3733   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
   3734 							       __a);
   3735 }
   3736 
   3737 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   3738 vreinterpretq_u8_p16 (poly16x8_t __a)
   3739 {
   3740   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
   3741 							      __a);
   3742 }
   3743 
   3744 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3745 vreinterpret_u16_s8 (int8x8_t __a)
   3746 {
   3747   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
   3748 }
   3749 
   3750 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3751 vreinterpret_u16_s16 (int16x4_t __a)
   3752 {
   3753   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
   3754 }
   3755 
   3756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3757 vreinterpret_u16_s32 (int32x2_t __a)
   3758 {
   3759   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
   3760 }
   3761 
   3762 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3763 vreinterpret_u16_s64 (int64x1_t __a)
   3764 {
   3765   return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
   3766 }
   3767 
   3768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3769 vreinterpret_u16_f32 (float32x2_t __a)
   3770 {
   3771   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
   3772 }
   3773 
   3774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3775 vreinterpret_u16_u8 (uint8x8_t __a)
   3776 {
   3777   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
   3778 }
   3779 
   3780 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3781 vreinterpret_u16_u32 (uint32x2_t __a)
   3782 {
   3783   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
   3784 }
   3785 
   3786 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3787 vreinterpret_u16_u64 (uint64x1_t __a)
   3788 {
   3789   return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
   3790 }
   3791 
   3792 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3793 vreinterpret_u16_p8 (poly8x8_t __a)
   3794 {
   3795   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
   3796 }
   3797 
   3798 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   3799 vreinterpret_u16_p16 (poly16x4_t __a)
   3800 {
   3801   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
   3802 }
   3803 
   3804 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3805 vreinterpretq_u16_s8 (int8x16_t __a)
   3806 {
   3807   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
   3808 }
   3809 
   3810 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3811 vreinterpretq_u16_s16 (int16x8_t __a)
   3812 {
   3813   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
   3814 }
   3815 
   3816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3817 vreinterpretq_u16_s32 (int32x4_t __a)
   3818 {
   3819   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
   3820 }
   3821 
   3822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3823 vreinterpretq_u16_s64 (int64x2_t __a)
   3824 {
   3825   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
   3826 }
   3827 
   3828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3829 vreinterpretq_u16_f32 (float32x4_t __a)
   3830 {
   3831   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
   3832 }
   3833 
   3834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3835 vreinterpretq_u16_u8 (uint8x16_t __a)
   3836 {
   3837   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
   3838 							      __a);
   3839 }
   3840 
   3841 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3842 vreinterpretq_u16_u32 (uint32x4_t __a)
   3843 {
   3844   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
   3845 }
   3846 
   3847 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3848 vreinterpretq_u16_u64 (uint64x2_t __a)
   3849 {
   3850   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
   3851 }
   3852 
   3853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3854 vreinterpretq_u16_p8 (poly8x16_t __a)
   3855 {
   3856   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
   3857 							      __a);
   3858 }
   3859 
   3860 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   3861 vreinterpretq_u16_p16 (poly16x8_t __a)
   3862 {
   3863   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
   3864 }
   3865 
   3866 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3867 vreinterpret_u32_s8 (int8x8_t __a)
   3868 {
   3869   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
   3870 }
   3871 
   3872 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3873 vreinterpret_u32_s16 (int16x4_t __a)
   3874 {
   3875   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
   3876 }
   3877 
   3878 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3879 vreinterpret_u32_s32 (int32x2_t __a)
   3880 {
   3881   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
   3882 }
   3883 
   3884 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3885 vreinterpret_u32_s64 (int64x1_t __a)
   3886 {
   3887   return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
   3888 }
   3889 
   3890 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3891 vreinterpret_u32_f32 (float32x2_t __a)
   3892 {
   3893   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
   3894 }
   3895 
   3896 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3897 vreinterpret_u32_u8 (uint8x8_t __a)
   3898 {
   3899   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
   3900 }
   3901 
   3902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3903 vreinterpret_u32_u16 (uint16x4_t __a)
   3904 {
   3905   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
   3906 }
   3907 
   3908 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3909 vreinterpret_u32_u64 (uint64x1_t __a)
   3910 {
   3911   return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
   3912 }
   3913 
   3914 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3915 vreinterpret_u32_p8 (poly8x8_t __a)
   3916 {
   3917   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
   3918 }
   3919 
   3920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   3921 vreinterpret_u32_p16 (poly16x4_t __a)
   3922 {
   3923   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
   3924 }
   3925 
   3926 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3927 vreinterpretq_u32_s8 (int8x16_t __a)
   3928 {
   3929   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
   3930 }
   3931 
   3932 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3933 vreinterpretq_u32_s16 (int16x8_t __a)
   3934 {
   3935   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
   3936 }
   3937 
   3938 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3939 vreinterpretq_u32_s32 (int32x4_t __a)
   3940 {
   3941   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
   3942 }
   3943 
   3944 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3945 vreinterpretq_u32_s64 (int64x2_t __a)
   3946 {
   3947   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
   3948 }
   3949 
   3950 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3951 vreinterpretq_u32_f32 (float32x4_t __a)
   3952 {
   3953   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
   3954 }
   3955 
   3956 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3957 vreinterpretq_u32_u8 (uint8x16_t __a)
   3958 {
   3959   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
   3960 							      __a);
   3961 }
   3962 
   3963 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3964 vreinterpretq_u32_u16 (uint16x8_t __a)
   3965 {
   3966   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
   3967 }
   3968 
   3969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3970 vreinterpretq_u32_u64 (uint64x2_t __a)
   3971 {
   3972   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
   3973 }
   3974 
   3975 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3976 vreinterpretq_u32_p8 (poly8x16_t __a)
   3977 {
   3978   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
   3979 							      __a);
   3980 }
   3981 
   3982 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   3983 vreinterpretq_u32_p16 (poly16x8_t __a)
   3984 {
   3985   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
   3986 }
   3987 
   3988 #define __GET_LOW(__TYPE) \
   3989   uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);  \
   3990   uint64_t lo = vgetq_lane_u64 (tmp, 0);  \
   3991   return vreinterpret_##__TYPE##_u64 (lo);
   3992 
   3993 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   3994 vget_low_f32 (float32x4_t __a)
   3995 {
   3996   __GET_LOW (f32);
   3997 }
   3998 
   3999 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   4000 vget_low_f64 (float64x2_t __a)
   4001 {
   4002   return vgetq_lane_f64 (__a, 0);
   4003 }
   4004 
   4005 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   4006 vget_low_p8 (poly8x16_t __a)
   4007 {
   4008   __GET_LOW (p8);
   4009 }
   4010 
   4011 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   4012 vget_low_p16 (poly16x8_t __a)
   4013 {
   4014   __GET_LOW (p16);
   4015 }
   4016 
   4017 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   4018 vget_low_s8 (int8x16_t __a)
   4019 {
   4020   __GET_LOW (s8);
   4021 }
   4022 
   4023 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   4024 vget_low_s16 (int16x8_t __a)
   4025 {
   4026   __GET_LOW (s16);
   4027 }
   4028 
   4029 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   4030 vget_low_s32 (int32x4_t __a)
   4031 {
   4032   __GET_LOW (s32);
   4033 }
   4034 
   4035 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   4036 vget_low_s64 (int64x2_t __a)
   4037 {
   4038   return vgetq_lane_s64 (__a, 0);
   4039 }
   4040 
   4041 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   4042 vget_low_u8 (uint8x16_t __a)
   4043 {
   4044   __GET_LOW (u8);
   4045 }
   4046 
   4047 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   4048 vget_low_u16 (uint16x8_t __a)
   4049 {
   4050   __GET_LOW (u16);
   4051 }
   4052 
   4053 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   4054 vget_low_u32 (uint32x4_t __a)
   4055 {
   4056   __GET_LOW (u32);
   4057 }
   4058 
   4059 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   4060 vget_low_u64 (uint64x2_t __a)
   4061 {
   4062   return vgetq_lane_u64 (__a, 0);
   4063 }
   4064 
   4065 #undef __GET_LOW
   4066 
   4067 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   4068 vcombine_s8 (int8x8_t __a, int8x8_t __b)
   4069 {
   4070   return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
   4071 }
   4072 
   4073 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4074 vcombine_s16 (int16x4_t __a, int16x4_t __b)
   4075 {
   4076   return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
   4077 }
   4078 
   4079 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4080 vcombine_s32 (int32x2_t __a, int32x2_t __b)
   4081 {
   4082   return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
   4083 }
   4084 
   4085 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   4086 vcombine_s64 (int64x1_t __a, int64x1_t __b)
   4087 {
   4088   return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
   4089 }
   4090 
   4091 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   4092 vcombine_f32 (float32x2_t __a, float32x2_t __b)
   4093 {
   4094   return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
   4095 }
   4096 
   4097 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   4098 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
   4099 {
   4100   return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
   4101 						     (int8x8_t) __b);
   4102 }
   4103 
   4104 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   4105 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
   4106 {
   4107   return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
   4108 						     (int16x4_t) __b);
   4109 }
   4110 
   4111 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   4112 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
   4113 {
   4114   return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
   4115 						     (int32x2_t) __b);
   4116 }
   4117 
   4118 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   4119 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
   4120 {
   4121   return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
   4122 						   (int64x1_t) __b);
   4123 }
   4124 
   4125 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   4126 vcombine_f64 (float64x1_t __a, float64x1_t __b)
   4127 {
   4128   return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
   4129 }
   4130 
   4131 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   4132 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
   4133 {
   4134   return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
   4135 						     (int8x8_t) __b);
   4136 }
   4137 
   4138 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   4139 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
   4140 {
   4141   return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
   4142 						     (int16x4_t) __b);
   4143 }
   4144 
   4145 /* Start of temporary inline asm implementations.  */
   4146 
   4147 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   4148 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
   4149 {
   4150   int8x8_t result;
   4151   __asm__ ("saba %0.8b,%2.8b,%3.8b"
   4152            : "=w"(result)
   4153            : "0"(a), "w"(b), "w"(c)
   4154            : /* No clobbers */);
   4155   return result;
   4156 }
   4157 
   4158 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   4159 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
   4160 {
   4161   int16x4_t result;
   4162   __asm__ ("saba %0.4h,%2.4h,%3.4h"
   4163            : "=w"(result)
   4164            : "0"(a), "w"(b), "w"(c)
   4165            : /* No clobbers */);
   4166   return result;
   4167 }
   4168 
   4169 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   4170 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
   4171 {
   4172   int32x2_t result;
   4173   __asm__ ("saba %0.2s,%2.2s,%3.2s"
   4174            : "=w"(result)
   4175            : "0"(a), "w"(b), "w"(c)
   4176            : /* No clobbers */);
   4177   return result;
   4178 }
   4179 
   4180 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   4181 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
   4182 {
   4183   uint8x8_t result;
   4184   __asm__ ("uaba %0.8b,%2.8b,%3.8b"
   4185            : "=w"(result)
   4186            : "0"(a), "w"(b), "w"(c)
   4187            : /* No clobbers */);
   4188   return result;
   4189 }
   4190 
   4191 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   4192 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
   4193 {
   4194   uint16x4_t result;
   4195   __asm__ ("uaba %0.4h,%2.4h,%3.4h"
   4196            : "=w"(result)
   4197            : "0"(a), "w"(b), "w"(c)
   4198            : /* No clobbers */);
   4199   return result;
   4200 }
   4201 
   4202 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   4203 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
   4204 {
   4205   uint32x2_t result;
   4206   __asm__ ("uaba %0.2s,%2.2s,%3.2s"
   4207            : "=w"(result)
   4208            : "0"(a), "w"(b), "w"(c)
   4209            : /* No clobbers */);
   4210   return result;
   4211 }
   4212 
   4213 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4214 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
   4215 {
   4216   int16x8_t result;
   4217   __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
   4218            : "=w"(result)
   4219            : "0"(a), "w"(b), "w"(c)
   4220            : /* No clobbers */);
   4221   return result;
   4222 }
   4223 
   4224 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4225 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
   4226 {
   4227   int32x4_t result;
   4228   __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
   4229            : "=w"(result)
   4230            : "0"(a), "w"(b), "w"(c)
   4231            : /* No clobbers */);
   4232   return result;
   4233 }
   4234 
   4235 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   4236 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
   4237 {
   4238   int64x2_t result;
   4239   __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
   4240            : "=w"(result)
   4241            : "0"(a), "w"(b), "w"(c)
   4242            : /* No clobbers */);
   4243   return result;
   4244 }
   4245 
   4246 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   4247 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
   4248 {
   4249   uint16x8_t result;
   4250   __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
   4251            : "=w"(result)
   4252            : "0"(a), "w"(b), "w"(c)
   4253            : /* No clobbers */);
   4254   return result;
   4255 }
   4256 
   4257 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   4258 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
   4259 {
   4260   uint32x4_t result;
   4261   __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
   4262            : "=w"(result)
   4263            : "0"(a), "w"(b), "w"(c)
   4264            : /* No clobbers */);
   4265   return result;
   4266 }
   4267 
   4268 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   4269 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
   4270 {
   4271   uint64x2_t result;
   4272   __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
   4273            : "=w"(result)
   4274            : "0"(a), "w"(b), "w"(c)
   4275            : /* No clobbers */);
   4276   return result;
   4277 }
   4278 
   4279 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4280 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
   4281 {
   4282   int16x8_t result;
   4283   __asm__ ("sabal %0.8h,%2.8b,%3.8b"
   4284            : "=w"(result)
   4285            : "0"(a), "w"(b), "w"(c)
   4286            : /* No clobbers */);
   4287   return result;
   4288 }
   4289 
   4290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4291 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
   4292 {
   4293   int32x4_t result;
   4294   __asm__ ("sabal %0.4s,%2.4h,%3.4h"
   4295            : "=w"(result)
   4296            : "0"(a), "w"(b), "w"(c)
   4297            : /* No clobbers */);
   4298   return result;
   4299 }
   4300 
   4301 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   4302 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
   4303 {
   4304   int64x2_t result;
   4305   __asm__ ("sabal %0.2d,%2.2s,%3.2s"
   4306            : "=w"(result)
   4307            : "0"(a), "w"(b), "w"(c)
   4308            : /* No clobbers */);
   4309   return result;
   4310 }
   4311 
   4312 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   4313 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
   4314 {
   4315   uint16x8_t result;
   4316   __asm__ ("uabal %0.8h,%2.8b,%3.8b"
   4317            : "=w"(result)
   4318            : "0"(a), "w"(b), "w"(c)
   4319            : /* No clobbers */);
   4320   return result;
   4321 }
   4322 
   4323 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   4324 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
   4325 {
   4326   uint32x4_t result;
   4327   __asm__ ("uabal %0.4s,%2.4h,%3.4h"
   4328            : "=w"(result)
   4329            : "0"(a), "w"(b), "w"(c)
   4330            : /* No clobbers */);
   4331   return result;
   4332 }
   4333 
   4334 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   4335 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
   4336 {
   4337   uint64x2_t result;
   4338   __asm__ ("uabal %0.2d,%2.2s,%3.2s"
   4339            : "=w"(result)
   4340            : "0"(a), "w"(b), "w"(c)
   4341            : /* No clobbers */);
   4342   return result;
   4343 }
   4344 
   4345 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   4346 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
   4347 {
   4348   int8x16_t result;
   4349   __asm__ ("saba %0.16b,%2.16b,%3.16b"
   4350            : "=w"(result)
   4351            : "0"(a), "w"(b), "w"(c)
   4352            : /* No clobbers */);
   4353   return result;
   4354 }
   4355 
   4356 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4357 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
   4358 {
   4359   int16x8_t result;
   4360   __asm__ ("saba %0.8h,%2.8h,%3.8h"
   4361            : "=w"(result)
   4362            : "0"(a), "w"(b), "w"(c)
   4363            : /* No clobbers */);
   4364   return result;
   4365 }
   4366 
   4367 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4368 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
   4369 {
   4370   int32x4_t result;
   4371   __asm__ ("saba %0.4s,%2.4s,%3.4s"
   4372            : "=w"(result)
   4373            : "0"(a), "w"(b), "w"(c)
   4374            : /* No clobbers */);
   4375   return result;
   4376 }
   4377 
   4378 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   4379 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
   4380 {
   4381   uint8x16_t result;
   4382   __asm__ ("uaba %0.16b,%2.16b,%3.16b"
   4383            : "=w"(result)
   4384            : "0"(a), "w"(b), "w"(c)
   4385            : /* No clobbers */);
   4386   return result;
   4387 }
   4388 
   4389 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   4390 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
   4391 {
   4392   uint16x8_t result;
   4393   __asm__ ("uaba %0.8h,%2.8h,%3.8h"
   4394            : "=w"(result)
   4395            : "0"(a), "w"(b), "w"(c)
   4396            : /* No clobbers */);
   4397   return result;
   4398 }
   4399 
   4400 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   4401 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
   4402 {
   4403   uint32x4_t result;
   4404   __asm__ ("uaba %0.4s,%2.4s,%3.4s"
   4405            : "=w"(result)
   4406            : "0"(a), "w"(b), "w"(c)
   4407            : /* No clobbers */);
   4408   return result;
   4409 }
   4410 
   4411 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   4412 vabd_f32 (float32x2_t a, float32x2_t b)
   4413 {
   4414   float32x2_t result;
   4415   __asm__ ("fabd %0.2s, %1.2s, %2.2s"
   4416            : "=w"(result)
   4417            : "w"(a), "w"(b)
   4418            : /* No clobbers */);
   4419   return result;
   4420 }
   4421 
   4422 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   4423 vabd_s8 (int8x8_t a, int8x8_t b)
   4424 {
   4425   int8x8_t result;
   4426   __asm__ ("sabd %0.8b, %1.8b, %2.8b"
   4427            : "=w"(result)
   4428            : "w"(a), "w"(b)
   4429            : /* No clobbers */);
   4430   return result;
   4431 }
   4432 
   4433 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   4434 vabd_s16 (int16x4_t a, int16x4_t b)
   4435 {
   4436   int16x4_t result;
   4437   __asm__ ("sabd %0.4h, %1.4h, %2.4h"
   4438            : "=w"(result)
   4439            : "w"(a), "w"(b)
   4440            : /* No clobbers */);
   4441   return result;
   4442 }
   4443 
   4444 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   4445 vabd_s32 (int32x2_t a, int32x2_t b)
   4446 {
   4447   int32x2_t result;
   4448   __asm__ ("sabd %0.2s, %1.2s, %2.2s"
   4449            : "=w"(result)
   4450            : "w"(a), "w"(b)
   4451            : /* No clobbers */);
   4452   return result;
   4453 }
   4454 
   4455 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   4456 vabd_u8 (uint8x8_t a, uint8x8_t b)
   4457 {
   4458   uint8x8_t result;
   4459   __asm__ ("uabd %0.8b, %1.8b, %2.8b"
   4460            : "=w"(result)
   4461            : "w"(a), "w"(b)
   4462            : /* No clobbers */);
   4463   return result;
   4464 }
   4465 
   4466 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   4467 vabd_u16 (uint16x4_t a, uint16x4_t b)
   4468 {
   4469   uint16x4_t result;
   4470   __asm__ ("uabd %0.4h, %1.4h, %2.4h"
   4471            : "=w"(result)
   4472            : "w"(a), "w"(b)
   4473            : /* No clobbers */);
   4474   return result;
   4475 }
   4476 
   4477 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   4478 vabd_u32 (uint32x2_t a, uint32x2_t b)
   4479 {
   4480   uint32x2_t result;
   4481   __asm__ ("uabd %0.2s, %1.2s, %2.2s"
   4482            : "=w"(result)
   4483            : "w"(a), "w"(b)
   4484            : /* No clobbers */);
   4485   return result;
   4486 }
   4487 
   4488 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   4489 vabdd_f64 (float64_t a, float64_t b)
   4490 {
   4491   float64_t result;
   4492   __asm__ ("fabd %d0, %d1, %d2"
   4493            : "=w"(result)
   4494            : "w"(a), "w"(b)
   4495            : /* No clobbers */);
   4496   return result;
   4497 }
   4498 
   4499 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4500 vabdl_high_s8 (int8x16_t a, int8x16_t b)
   4501 {
   4502   int16x8_t result;
   4503   __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
   4504            : "=w"(result)
   4505            : "w"(a), "w"(b)
   4506            : /* No clobbers */);
   4507   return result;
   4508 }
   4509 
   4510 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4511 vabdl_high_s16 (int16x8_t a, int16x8_t b)
   4512 {
   4513   int32x4_t result;
   4514   __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
   4515            : "=w"(result)
   4516            : "w"(a), "w"(b)
   4517            : /* No clobbers */);
   4518   return result;
   4519 }
   4520 
   4521 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   4522 vabdl_high_s32 (int32x4_t a, int32x4_t b)
   4523 {
   4524   int64x2_t result;
   4525   __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
   4526            : "=w"(result)
   4527            : "w"(a), "w"(b)
   4528            : /* No clobbers */);
   4529   return result;
   4530 }
   4531 
   4532 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   4533 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
   4534 {
   4535   uint16x8_t result;
   4536   __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
   4537            : "=w"(result)
   4538            : "w"(a), "w"(b)
   4539            : /* No clobbers */);
   4540   return result;
   4541 }
   4542 
   4543 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   4544 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
   4545 {
   4546   uint32x4_t result;
   4547   __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
   4548            : "=w"(result)
   4549            : "w"(a), "w"(b)
   4550            : /* No clobbers */);
   4551   return result;
   4552 }
   4553 
   4554 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   4555 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
   4556 {
   4557   uint64x2_t result;
   4558   __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
   4559            : "=w"(result)
   4560            : "w"(a), "w"(b)
   4561            : /* No clobbers */);
   4562   return result;
   4563 }
   4564 
   4565 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4566 vabdl_s8 (int8x8_t a, int8x8_t b)
   4567 {
   4568   int16x8_t result;
   4569   __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
   4570            : "=w"(result)
   4571            : "w"(a), "w"(b)
   4572            : /* No clobbers */);
   4573   return result;
   4574 }
   4575 
   4576 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4577 vabdl_s16 (int16x4_t a, int16x4_t b)
   4578 {
   4579   int32x4_t result;
   4580   __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
   4581            : "=w"(result)
   4582            : "w"(a), "w"(b)
   4583            : /* No clobbers */);
   4584   return result;
   4585 }
   4586 
   4587 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   4588 vabdl_s32 (int32x2_t a, int32x2_t b)
   4589 {
   4590   int64x2_t result;
   4591   __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
   4592            : "=w"(result)
   4593            : "w"(a), "w"(b)
   4594            : /* No clobbers */);
   4595   return result;
   4596 }
   4597 
   4598 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   4599 vabdl_u8 (uint8x8_t a, uint8x8_t b)
   4600 {
   4601   uint16x8_t result;
   4602   __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
   4603            : "=w"(result)
   4604            : "w"(a), "w"(b)
   4605            : /* No clobbers */);
   4606   return result;
   4607 }
   4608 
   4609 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   4610 vabdl_u16 (uint16x4_t a, uint16x4_t b)
   4611 {
   4612   uint32x4_t result;
   4613   __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
   4614            : "=w"(result)
   4615            : "w"(a), "w"(b)
   4616            : /* No clobbers */);
   4617   return result;
   4618 }
   4619 
   4620 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   4621 vabdl_u32 (uint32x2_t a, uint32x2_t b)
   4622 {
   4623   uint64x2_t result;
   4624   __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
   4625            : "=w"(result)
   4626            : "w"(a), "w"(b)
   4627            : /* No clobbers */);
   4628   return result;
   4629 }
   4630 
   4631 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   4632 vabdq_f32 (float32x4_t a, float32x4_t b)
   4633 {
   4634   float32x4_t result;
   4635   __asm__ ("fabd %0.4s, %1.4s, %2.4s"
   4636            : "=w"(result)
   4637            : "w"(a), "w"(b)
   4638            : /* No clobbers */);
   4639   return result;
   4640 }
   4641 
   4642 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   4643 vabdq_f64 (float64x2_t a, float64x2_t b)
   4644 {
   4645   float64x2_t result;
   4646   __asm__ ("fabd %0.2d, %1.2d, %2.2d"
   4647            : "=w"(result)
   4648            : "w"(a), "w"(b)
   4649            : /* No clobbers */);
   4650   return result;
   4651 }
   4652 
   4653 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   4654 vabdq_s8 (int8x16_t a, int8x16_t b)
   4655 {
   4656   int8x16_t result;
   4657   __asm__ ("sabd %0.16b, %1.16b, %2.16b"
   4658            : "=w"(result)
   4659            : "w"(a), "w"(b)
   4660            : /* No clobbers */);
   4661   return result;
   4662 }
   4663 
   4664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4665 vabdq_s16 (int16x8_t a, int16x8_t b)
   4666 {
   4667   int16x8_t result;
   4668   __asm__ ("sabd %0.8h, %1.8h, %2.8h"
   4669            : "=w"(result)
   4670            : "w"(a), "w"(b)
   4671            : /* No clobbers */);
   4672   return result;
   4673 }
   4674 
   4675 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4676 vabdq_s32 (int32x4_t a, int32x4_t b)
   4677 {
   4678   int32x4_t result;
   4679   __asm__ ("sabd %0.4s, %1.4s, %2.4s"
   4680            : "=w"(result)
   4681            : "w"(a), "w"(b)
   4682            : /* No clobbers */);
   4683   return result;
   4684 }
   4685 
   4686 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   4687 vabdq_u8 (uint8x16_t a, uint8x16_t b)
   4688 {
   4689   uint8x16_t result;
   4690   __asm__ ("uabd %0.16b, %1.16b, %2.16b"
   4691            : "=w"(result)
   4692            : "w"(a), "w"(b)
   4693            : /* No clobbers */);
   4694   return result;
   4695 }
   4696 
   4697 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   4698 vabdq_u16 (uint16x8_t a, uint16x8_t b)
   4699 {
   4700   uint16x8_t result;
   4701   __asm__ ("uabd %0.8h, %1.8h, %2.8h"
   4702            : "=w"(result)
   4703            : "w"(a), "w"(b)
   4704            : /* No clobbers */);
   4705   return result;
   4706 }
   4707 
   4708 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   4709 vabdq_u32 (uint32x4_t a, uint32x4_t b)
   4710 {
   4711   uint32x4_t result;
   4712   __asm__ ("uabd %0.4s, %1.4s, %2.4s"
   4713            : "=w"(result)
   4714            : "w"(a), "w"(b)
   4715            : /* No clobbers */);
   4716   return result;
   4717 }
   4718 
   4719 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   4720 vabds_f32 (float32_t a, float32_t b)
   4721 {
   4722   float32_t result;
   4723   __asm__ ("fabd %s0, %s1, %s2"
   4724            : "=w"(result)
   4725            : "w"(a), "w"(b)
   4726            : /* No clobbers */);
   4727   return result;
   4728 }
   4729 
   4730 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   4731 vaddlv_s8 (int8x8_t a)
   4732 {
   4733   int16_t result;
   4734   __asm__ ("saddlv %h0,%1.8b"
   4735            : "=w"(result)
   4736            : "w"(a)
   4737            : /* No clobbers */);
   4738   return result;
   4739 }
   4740 
   4741 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   4742 vaddlv_s16 (int16x4_t a)
   4743 {
   4744   int32_t result;
   4745   __asm__ ("saddlv %s0,%1.4h"
   4746            : "=w"(result)
   4747            : "w"(a)
   4748            : /* No clobbers */);
   4749   return result;
   4750 }
   4751 
   4752 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   4753 vaddlv_u8 (uint8x8_t a)
   4754 {
   4755   uint16_t result;
   4756   __asm__ ("uaddlv %h0,%1.8b"
   4757            : "=w"(result)
   4758            : "w"(a)
   4759            : /* No clobbers */);
   4760   return result;
   4761 }
   4762 
   4763 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   4764 vaddlv_u16 (uint16x4_t a)
   4765 {
   4766   uint32_t result;
   4767   __asm__ ("uaddlv %s0,%1.4h"
   4768            : "=w"(result)
   4769            : "w"(a)
   4770            : /* No clobbers */);
   4771   return result;
   4772 }
   4773 
   4774 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   4775 vaddlvq_s8 (int8x16_t a)
   4776 {
   4777   int16_t result;
   4778   __asm__ ("saddlv %h0,%1.16b"
   4779            : "=w"(result)
   4780            : "w"(a)
   4781            : /* No clobbers */);
   4782   return result;
   4783 }
   4784 
   4785 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   4786 vaddlvq_s16 (int16x8_t a)
   4787 {
   4788   int32_t result;
   4789   __asm__ ("saddlv %s0,%1.8h"
   4790            : "=w"(result)
   4791            : "w"(a)
   4792            : /* No clobbers */);
   4793   return result;
   4794 }
   4795 
   4796 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   4797 vaddlvq_s32 (int32x4_t a)
   4798 {
   4799   int64_t result;
   4800   __asm__ ("saddlv %d0,%1.4s"
   4801            : "=w"(result)
   4802            : "w"(a)
   4803            : /* No clobbers */);
   4804   return result;
   4805 }
   4806 
   4807 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   4808 vaddlvq_u8 (uint8x16_t a)
   4809 {
   4810   uint16_t result;
   4811   __asm__ ("uaddlv %h0,%1.16b"
   4812            : "=w"(result)
   4813            : "w"(a)
   4814            : /* No clobbers */);
   4815   return result;
   4816 }
   4817 
   4818 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   4819 vaddlvq_u16 (uint16x8_t a)
   4820 {
   4821   uint32_t result;
   4822   __asm__ ("uaddlv %s0,%1.8h"
   4823            : "=w"(result)
   4824            : "w"(a)
   4825            : /* No clobbers */);
   4826   return result;
   4827 }
   4828 
   4829 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   4830 vaddlvq_u32 (uint32x4_t a)
   4831 {
   4832   uint64_t result;
   4833   __asm__ ("uaddlv %d0,%1.4s"
   4834            : "=w"(result)
   4835            : "w"(a)
   4836            : /* No clobbers */);
   4837   return result;
   4838 }
   4839 
   4840 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   4841 vcls_s8 (int8x8_t a)
   4842 {
   4843   int8x8_t result;
   4844   __asm__ ("cls %0.8b,%1.8b"
   4845            : "=w"(result)
   4846            : "w"(a)
   4847            : /* No clobbers */);
   4848   return result;
   4849 }
   4850 
   4851 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   4852 vcls_s16 (int16x4_t a)
   4853 {
   4854   int16x4_t result;
   4855   __asm__ ("cls %0.4h,%1.4h"
   4856            : "=w"(result)
   4857            : "w"(a)
   4858            : /* No clobbers */);
   4859   return result;
   4860 }
   4861 
   4862 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   4863 vcls_s32 (int32x2_t a)
   4864 {
   4865   int32x2_t result;
   4866   __asm__ ("cls %0.2s,%1.2s"
   4867            : "=w"(result)
   4868            : "w"(a)
   4869            : /* No clobbers */);
   4870   return result;
   4871 }
   4872 
   4873 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   4874 vclsq_s8 (int8x16_t a)
   4875 {
   4876   int8x16_t result;
   4877   __asm__ ("cls %0.16b,%1.16b"
   4878            : "=w"(result)
   4879            : "w"(a)
   4880            : /* No clobbers */);
   4881   return result;
   4882 }
   4883 
   4884 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   4885 vclsq_s16 (int16x8_t a)
   4886 {
   4887   int16x8_t result;
   4888   __asm__ ("cls %0.8h,%1.8h"
   4889            : "=w"(result)
   4890            : "w"(a)
   4891            : /* No clobbers */);
   4892   return result;
   4893 }
   4894 
   4895 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   4896 vclsq_s32 (int32x4_t a)
   4897 {
   4898   int32x4_t result;
   4899   __asm__ ("cls %0.4s,%1.4s"
   4900            : "=w"(result)
   4901            : "w"(a)
   4902            : /* No clobbers */);
   4903   return result;
   4904 }
   4905 
   4906 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   4907 vcnt_p8 (poly8x8_t a)
   4908 {
   4909   poly8x8_t result;
   4910   __asm__ ("cnt %0.8b,%1.8b"
   4911            : "=w"(result)
   4912            : "w"(a)
   4913            : /* No clobbers */);
   4914   return result;
   4915 }
   4916 
   4917 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   4918 vcnt_s8 (int8x8_t a)
   4919 {
   4920   int8x8_t result;
   4921   __asm__ ("cnt %0.8b,%1.8b"
   4922            : "=w"(result)
   4923            : "w"(a)
   4924            : /* No clobbers */);
   4925   return result;
   4926 }
   4927 
   4928 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   4929 vcnt_u8 (uint8x8_t a)
   4930 {
   4931   uint8x8_t result;
   4932   __asm__ ("cnt %0.8b,%1.8b"
   4933            : "=w"(result)
   4934            : "w"(a)
   4935            : /* No clobbers */);
   4936   return result;
   4937 }
   4938 
   4939 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   4940 vcntq_p8 (poly8x16_t a)
   4941 {
   4942   poly8x16_t result;
   4943   __asm__ ("cnt %0.16b,%1.16b"
   4944            : "=w"(result)
   4945            : "w"(a)
   4946            : /* No clobbers */);
   4947   return result;
   4948 }
   4949 
   4950 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   4951 vcntq_s8 (int8x16_t a)
   4952 {
   4953   int8x16_t result;
   4954   __asm__ ("cnt %0.16b,%1.16b"
   4955            : "=w"(result)
   4956            : "w"(a)
   4957            : /* No clobbers */);
   4958   return result;
   4959 }
   4960 
   4961 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   4962 vcntq_u8 (uint8x16_t a)
   4963 {
   4964   uint8x16_t result;
   4965   __asm__ ("cnt %0.16b,%1.16b"
   4966            : "=w"(result)
   4967            : "w"(a)
   4968            : /* No clobbers */);
   4969   return result;
   4970 }
   4971 
   4972 #define vcopyq_lane_f32(a, b, c, d)                                     \
   4973   __extension__                                                         \
   4974     ({                                                                  \
   4975        float32x4_t c_ = (c);                                            \
   4976        float32x4_t a_ = (a);                                            \
   4977        float32x4_t result;                                              \
   4978        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
   4979                 : "=w"(result)                                          \
   4980                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   4981                 : /* No clobbers */);                                   \
   4982        result;                                                          \
   4983      })
   4984 
   4985 #define vcopyq_lane_f64(a, b, c, d)                                     \
   4986   __extension__                                                         \
   4987     ({                                                                  \
   4988        float64x2_t c_ = (c);                                            \
   4989        float64x2_t a_ = (a);                                            \
   4990        float64x2_t result;                                              \
   4991        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
   4992                 : "=w"(result)                                          \
   4993                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   4994                 : /* No clobbers */);                                   \
   4995        result;                                                          \
   4996      })
   4997 
   4998 #define vcopyq_lane_p8(a, b, c, d)                                      \
   4999   __extension__                                                         \
   5000     ({                                                                  \
   5001        poly8x16_t c_ = (c);                                             \
   5002        poly8x16_t a_ = (a);                                             \
   5003        poly8x16_t result;                                               \
   5004        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
   5005                 : "=w"(result)                                          \
   5006                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5007                 : /* No clobbers */);                                   \
   5008        result;                                                          \
   5009      })
   5010 
   5011 #define vcopyq_lane_p16(a, b, c, d)                                     \
   5012   __extension__                                                         \
   5013     ({                                                                  \
   5014        poly16x8_t c_ = (c);                                             \
   5015        poly16x8_t a_ = (a);                                             \
   5016        poly16x8_t result;                                               \
   5017        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
   5018                 : "=w"(result)                                          \
   5019                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5020                 : /* No clobbers */);                                   \
   5021        result;                                                          \
   5022      })
   5023 
   5024 #define vcopyq_lane_s8(a, b, c, d)                                      \
   5025   __extension__                                                         \
   5026     ({                                                                  \
   5027        int8x16_t c_ = (c);                                              \
   5028        int8x16_t a_ = (a);                                              \
   5029        int8x16_t result;                                                \
   5030        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
   5031                 : "=w"(result)                                          \
   5032                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5033                 : /* No clobbers */);                                   \
   5034        result;                                                          \
   5035      })
   5036 
   5037 #define vcopyq_lane_s16(a, b, c, d)                                     \
   5038   __extension__                                                         \
   5039     ({                                                                  \
   5040        int16x8_t c_ = (c);                                              \
   5041        int16x8_t a_ = (a);                                              \
   5042        int16x8_t result;                                                \
   5043        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
   5044                 : "=w"(result)                                          \
   5045                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5046                 : /* No clobbers */);                                   \
   5047        result;                                                          \
   5048      })
   5049 
   5050 #define vcopyq_lane_s32(a, b, c, d)                                     \
   5051   __extension__                                                         \
   5052     ({                                                                  \
   5053        int32x4_t c_ = (c);                                              \
   5054        int32x4_t a_ = (a);                                              \
   5055        int32x4_t result;                                                \
   5056        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
   5057                 : "=w"(result)                                          \
   5058                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5059                 : /* No clobbers */);                                   \
   5060        result;                                                          \
   5061      })
   5062 
   5063 #define vcopyq_lane_s64(a, b, c, d)                                     \
   5064   __extension__                                                         \
   5065     ({                                                                  \
   5066        int64x2_t c_ = (c);                                              \
   5067        int64x2_t a_ = (a);                                              \
   5068        int64x2_t result;                                                \
   5069        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
   5070                 : "=w"(result)                                          \
   5071                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5072                 : /* No clobbers */);                                   \
   5073        result;                                                          \
   5074      })
   5075 
   5076 #define vcopyq_lane_u8(a, b, c, d)                                      \
   5077   __extension__                                                         \
   5078     ({                                                                  \
   5079        uint8x16_t c_ = (c);                                             \
   5080        uint8x16_t a_ = (a);                                             \
   5081        uint8x16_t result;                                               \
   5082        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
   5083                 : "=w"(result)                                          \
   5084                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5085                 : /* No clobbers */);                                   \
   5086        result;                                                          \
   5087      })
   5088 
   5089 #define vcopyq_lane_u16(a, b, c, d)                                     \
   5090   __extension__                                                         \
   5091     ({                                                                  \
   5092        uint16x8_t c_ = (c);                                             \
   5093        uint16x8_t a_ = (a);                                             \
   5094        uint16x8_t result;                                               \
   5095        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
   5096                 : "=w"(result)                                          \
   5097                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5098                 : /* No clobbers */);                                   \
   5099        result;                                                          \
   5100      })
   5101 
   5102 #define vcopyq_lane_u32(a, b, c, d)                                     \
   5103   __extension__                                                         \
   5104     ({                                                                  \
   5105        uint32x4_t c_ = (c);                                             \
   5106        uint32x4_t a_ = (a);                                             \
   5107        uint32x4_t result;                                               \
   5108        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
   5109                 : "=w"(result)                                          \
   5110                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5111                 : /* No clobbers */);                                   \
   5112        result;                                                          \
   5113      })
   5114 
   5115 #define vcopyq_lane_u64(a, b, c, d)                                     \
   5116   __extension__                                                         \
   5117     ({                                                                  \
   5118        uint64x2_t c_ = (c);                                             \
   5119        uint64x2_t a_ = (a);                                             \
   5120        uint64x2_t result;                                               \
   5121        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
   5122                 : "=w"(result)                                          \
   5123                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
   5124                 : /* No clobbers */);                                   \
   5125        result;                                                          \
   5126      })
   5127 
   5128 /* vcvt_f16_f32 not supported */
   5129 
   5130 /* vcvt_f32_f16 not supported */
   5131 
   5132 /* vcvt_high_f16_f32 not supported */
   5133 
   5134 /* vcvt_high_f32_f16 not supported */
   5135 
   5136 static float32x2_t vdup_n_f32 (float32_t);
   5137 
   5138 #define vcvt_n_f32_s32(a, b)                                            \
   5139   __extension__                                                         \
   5140     ({                                                                  \
   5141        int32x2_t a_ = (a);                                              \
   5142        float32x2_t result;                                              \
   5143        __asm__ ("scvtf %0.2s, %1.2s, #%2"                               \
   5144                 : "=w"(result)                                          \
   5145                 : "w"(a_), "i"(b)                                       \
   5146                 : /* No clobbers */);                                   \
   5147        result;                                                          \
   5148      })
   5149 
   5150 #define vcvt_n_f32_u32(a, b)                                            \
   5151   __extension__                                                         \
   5152     ({                                                                  \
   5153        uint32x2_t a_ = (a);                                             \
   5154        float32x2_t result;                                              \
   5155        __asm__ ("ucvtf %0.2s, %1.2s, #%2"                               \
   5156                 : "=w"(result)                                          \
   5157                 : "w"(a_), "i"(b)                                       \
   5158                 : /* No clobbers */);                                   \
   5159        result;                                                          \
   5160      })
   5161 
   5162 #define vcvt_n_s32_f32(a, b)                                            \
   5163   __extension__                                                         \
   5164     ({                                                                  \
   5165        float32x2_t a_ = (a);                                            \
   5166        int32x2_t result;                                                \
   5167        __asm__ ("fcvtzs %0.2s, %1.2s, #%2"                              \
   5168                 : "=w"(result)                                          \
   5169                 : "w"(a_), "i"(b)                                       \
   5170                 : /* No clobbers */);                                   \
   5171        result;                                                          \
   5172      })
   5173 
   5174 #define vcvt_n_u32_f32(a, b)                                            \
   5175   __extension__                                                         \
   5176     ({                                                                  \
   5177        float32x2_t a_ = (a);                                            \
   5178        uint32x2_t result;                                               \
   5179        __asm__ ("fcvtzu %0.2s, %1.2s, #%2"                              \
   5180                 : "=w"(result)                                          \
   5181                 : "w"(a_), "i"(b)                                       \
   5182                 : /* No clobbers */);                                   \
   5183        result;                                                          \
   5184      })
   5185 
   5186 #define vcvtd_n_f64_s64(a, b)                                           \
   5187   __extension__                                                         \
   5188     ({                                                                  \
   5189        int64_t a_ = (a);                                                \
   5190        float64_t result;                                                \
   5191        __asm__ ("scvtf %d0,%d1,%2"                                      \
   5192                 : "=w"(result)                                          \
   5193                 : "w"(a_), "i"(b)                                       \
   5194                 : /* No clobbers */);                                   \
   5195        result;                                                          \
   5196      })
   5197 
   5198 #define vcvtd_n_f64_u64(a, b)                                           \
   5199   __extension__                                                         \
   5200     ({                                                                  \
   5201        uint64_t a_ = (a);                                               \
   5202        float64_t result;                                                \
   5203        __asm__ ("ucvtf %d0,%d1,%2"                                      \
   5204                 : "=w"(result)                                          \
   5205                 : "w"(a_), "i"(b)                                       \
   5206                 : /* No clobbers */);                                   \
   5207        result;                                                          \
   5208      })
   5209 
   5210 #define vcvtd_n_s64_f64(a, b)                                           \
   5211   __extension__                                                         \
   5212     ({                                                                  \
   5213        float64_t a_ = (a);                                              \
   5214        int64_t result;                                                  \
   5215        __asm__ ("fcvtzs %d0,%d1,%2"                                     \
   5216                 : "=w"(result)                                          \
   5217                 : "w"(a_), "i"(b)                                       \
   5218                 : /* No clobbers */);                                   \
   5219        result;                                                          \
   5220      })
   5221 
   5222 #define vcvtd_n_u64_f64(a, b)                                           \
   5223   __extension__                                                         \
   5224     ({                                                                  \
   5225        float64_t a_ = (a);                                              \
   5226        uint64_t result;                                                 \
   5227        __asm__ ("fcvtzu %d0,%d1,%2"                                     \
   5228                 : "=w"(result)                                          \
   5229                 : "w"(a_), "i"(b)                                       \
   5230                 : /* No clobbers */);                                   \
   5231        result;                                                          \
   5232      })
   5233 
   5234 #define vcvtq_n_f32_s32(a, b)                                           \
   5235   __extension__                                                         \
   5236     ({                                                                  \
   5237        int32x4_t a_ = (a);                                              \
   5238        float32x4_t result;                                              \
   5239        __asm__ ("scvtf %0.4s, %1.4s, #%2"                               \
   5240                 : "=w"(result)                                          \
   5241                 : "w"(a_), "i"(b)                                       \
   5242                 : /* No clobbers */);                                   \
   5243        result;                                                          \
   5244      })
   5245 
   5246 #define vcvtq_n_f32_u32(a, b)                                           \
   5247   __extension__                                                         \
   5248     ({                                                                  \
   5249        uint32x4_t a_ = (a);                                             \
   5250        float32x4_t result;                                              \
   5251        __asm__ ("ucvtf %0.4s, %1.4s, #%2"                               \
   5252                 : "=w"(result)                                          \
   5253                 : "w"(a_), "i"(b)                                       \
   5254                 : /* No clobbers */);                                   \
   5255        result;                                                          \
   5256      })
   5257 
   5258 #define vcvtq_n_f64_s64(a, b)                                           \
   5259   __extension__                                                         \
   5260     ({                                                                  \
   5261        int64x2_t a_ = (a);                                              \
   5262        float64x2_t result;                                              \
   5263        __asm__ ("scvtf %0.2d, %1.2d, #%2"                               \
   5264                 : "=w"(result)                                          \
   5265                 : "w"(a_), "i"(b)                                       \
   5266                 : /* No clobbers */);                                   \
   5267        result;                                                          \
   5268      })
   5269 
   5270 #define vcvtq_n_f64_u64(a, b)                                           \
   5271   __extension__                                                         \
   5272     ({                                                                  \
   5273        uint64x2_t a_ = (a);                                             \
   5274        float64x2_t result;                                              \
   5275        __asm__ ("ucvtf %0.2d, %1.2d, #%2"                               \
   5276                 : "=w"(result)                                          \
   5277                 : "w"(a_), "i"(b)                                       \
   5278                 : /* No clobbers */);                                   \
   5279        result;                                                          \
   5280      })
   5281 
   5282 #define vcvtq_n_s32_f32(a, b)                                           \
   5283   __extension__                                                         \
   5284     ({                                                                  \
   5285        float32x4_t a_ = (a);                                            \
   5286        int32x4_t result;                                                \
   5287        __asm__ ("fcvtzs %0.4s, %1.4s, #%2"                              \
   5288                 : "=w"(result)                                          \
   5289                 : "w"(a_), "i"(b)                                       \
   5290                 : /* No clobbers */);                                   \
   5291        result;                                                          \
   5292      })
   5293 
   5294 #define vcvtq_n_s64_f64(a, b)                                           \
   5295   __extension__                                                         \
   5296     ({                                                                  \
   5297        float64x2_t a_ = (a);                                            \
   5298        int64x2_t result;                                                \
   5299        __asm__ ("fcvtzs %0.2d, %1.2d, #%2"                              \
   5300                 : "=w"(result)                                          \
   5301                 : "w"(a_), "i"(b)                                       \
   5302                 : /* No clobbers */);                                   \
   5303        result;                                                          \
   5304      })
   5305 
   5306 #define vcvtq_n_u32_f32(a, b)                                           \
   5307   __extension__                                                         \
   5308     ({                                                                  \
   5309        float32x4_t a_ = (a);                                            \
   5310        uint32x4_t result;                                               \
   5311        __asm__ ("fcvtzu %0.4s, %1.4s, #%2"                              \
   5312                 : "=w"(result)                                          \
   5313                 : "w"(a_), "i"(b)                                       \
   5314                 : /* No clobbers */);                                   \
   5315        result;                                                          \
   5316      })
   5317 
   5318 #define vcvtq_n_u64_f64(a, b)                                           \
   5319   __extension__                                                         \
   5320     ({                                                                  \
   5321        float64x2_t a_ = (a);                                            \
   5322        uint64x2_t result;                                               \
   5323        __asm__ ("fcvtzu %0.2d, %1.2d, #%2"                              \
   5324                 : "=w"(result)                                          \
   5325                 : "w"(a_), "i"(b)                                       \
   5326                 : /* No clobbers */);                                   \
   5327        result;                                                          \
   5328      })
   5329 
   5330 #define vcvts_n_f32_s32(a, b)                                           \
   5331   __extension__                                                         \
   5332     ({                                                                  \
   5333        int32_t a_ = (a);                                                \
   5334        float32_t result;                                                \
   5335        __asm__ ("scvtf %s0,%s1,%2"                                      \
   5336                 : "=w"(result)                                          \
   5337                 : "w"(a_), "i"(b)                                       \
   5338                 : /* No clobbers */);                                   \
   5339        result;                                                          \
   5340      })
   5341 
   5342 #define vcvts_n_f32_u32(a, b)                                           \
   5343   __extension__                                                         \
   5344     ({                                                                  \
   5345        uint32_t a_ = (a);                                               \
   5346        float32_t result;                                                \
   5347        __asm__ ("ucvtf %s0,%s1,%2"                                      \
   5348                 : "=w"(result)                                          \
   5349                 : "w"(a_), "i"(b)                                       \
   5350                 : /* No clobbers */);                                   \
   5351        result;                                                          \
   5352      })
   5353 
   5354 #define vcvts_n_s32_f32(a, b)                                           \
   5355   __extension__                                                         \
   5356     ({                                                                  \
   5357        float32_t a_ = (a);                                              \
   5358        int32_t result;                                                  \
   5359        __asm__ ("fcvtzs %s0,%s1,%2"                                     \
   5360                 : "=w"(result)                                          \
   5361                 : "w"(a_), "i"(b)                                       \
   5362                 : /* No clobbers */);                                   \
   5363        result;                                                          \
   5364      })
   5365 
   5366 #define vcvts_n_u32_f32(a, b)                                           \
   5367   __extension__                                                         \
   5368     ({                                                                  \
   5369        float32_t a_ = (a);                                              \
   5370        uint32_t result;                                                 \
   5371        __asm__ ("fcvtzu %s0,%s1,%2"                                     \
   5372                 : "=w"(result)                                          \
   5373                 : "w"(a_), "i"(b)                                       \
   5374                 : /* No clobbers */);                                   \
   5375        result;                                                          \
   5376      })
   5377 
   5378 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   5379 vcvtx_f32_f64 (float64x2_t a)
   5380 {
   5381   float32x2_t result;
   5382   __asm__ ("fcvtxn %0.2s,%1.2d"
   5383            : "=w"(result)
   5384            : "w"(a)
   5385            : /* No clobbers */);
   5386   return result;
   5387 }
   5388 
   5389 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   5390 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
   5391 {
   5392   float32x4_t result;
   5393   __asm__ ("fcvtxn2 %0.4s,%1.2d"
   5394            : "=w"(result)
   5395            : "w" (b), "0"(a)
   5396            : /* No clobbers */);
   5397   return result;
   5398 }
   5399 
   5400 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   5401 vcvtxd_f32_f64 (float64_t a)
   5402 {
   5403   float32_t result;
   5404   __asm__ ("fcvtxn %s0,%d1"
   5405            : "=w"(result)
   5406            : "w"(a)
   5407            : /* No clobbers */);
   5408   return result;
   5409 }
   5410 
   5411 #define vext_f32(a, b, c)                                               \
   5412   __extension__                                                         \
   5413     ({                                                                  \
   5414        float32x2_t b_ = (b);                                            \
   5415        float32x2_t a_ = (a);                                            \
   5416        float32x2_t result;                                              \
   5417        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
   5418                 : "=w"(result)                                          \
   5419                 : "w"(a_), "w"(b_), "i"(c)                              \
   5420                 : /* No clobbers */);                                   \
   5421        result;                                                          \
   5422      })
   5423 
   5424 #define vext_f64(a, b, c)                                               \
   5425   __extension__                                                         \
   5426     ({                                                                  \
   5427        float64x1_t b_ = (b);                                            \
   5428        float64x1_t a_ = (a);                                            \
   5429        float64x1_t result;                                              \
   5430        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
   5431                 : "=w"(result)                                          \
   5432                 : "w"(a_), "w"(b_), "i"(c)                              \
   5433                 : /* No clobbers */);                                   \
   5434        result;                                                          \
   5435      })
   5436 
   5437 #define vext_p8(a, b, c)                                                \
   5438   __extension__                                                         \
   5439     ({                                                                  \
   5440        poly8x8_t b_ = (b);                                              \
   5441        poly8x8_t a_ = (a);                                              \
   5442        poly8x8_t result;                                                \
   5443        __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
   5444                 : "=w"(result)                                          \
   5445                 : "w"(a_), "w"(b_), "i"(c)                              \
   5446                 : /* No clobbers */);                                   \
   5447        result;                                                          \
   5448      })
   5449 
   5450 #define vext_p16(a, b, c)                                               \
   5451   __extension__                                                         \
   5452     ({                                                                  \
   5453        poly16x4_t b_ = (b);                                             \
   5454        poly16x4_t a_ = (a);                                             \
   5455        poly16x4_t result;                                               \
   5456        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
   5457                 : "=w"(result)                                          \
   5458                 : "w"(a_), "w"(b_), "i"(c)                              \
   5459                 : /* No clobbers */);                                   \
   5460        result;                                                          \
   5461      })
   5462 
   5463 #define vext_s8(a, b, c)                                                \
   5464   __extension__                                                         \
   5465     ({                                                                  \
   5466        int8x8_t b_ = (b);                                               \
   5467        int8x8_t a_ = (a);                                               \
   5468        int8x8_t result;                                                 \
   5469        __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
   5470                 : "=w"(result)                                          \
   5471                 : "w"(a_), "w"(b_), "i"(c)                              \
   5472                 : /* No clobbers */);                                   \
   5473        result;                                                          \
   5474      })
   5475 
   5476 #define vext_s16(a, b, c)                                               \
   5477   __extension__                                                         \
   5478     ({                                                                  \
   5479        int16x4_t b_ = (b);                                              \
   5480        int16x4_t a_ = (a);                                              \
   5481        int16x4_t result;                                                \
   5482        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
   5483                 : "=w"(result)                                          \
   5484                 : "w"(a_), "w"(b_), "i"(c)                              \
   5485                 : /* No clobbers */);                                   \
   5486        result;                                                          \
   5487      })
   5488 
   5489 #define vext_s32(a, b, c)                                               \
   5490   __extension__                                                         \
   5491     ({                                                                  \
   5492        int32x2_t b_ = (b);                                              \
   5493        int32x2_t a_ = (a);                                              \
   5494        int32x2_t result;                                                \
   5495        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
   5496                 : "=w"(result)                                          \
   5497                 : "w"(a_), "w"(b_), "i"(c)                              \
   5498                 : /* No clobbers */);                                   \
   5499        result;                                                          \
   5500      })
   5501 
   5502 #define vext_s64(a, b, c)                                               \
   5503   __extension__                                                         \
   5504     ({                                                                  \
   5505        int64x1_t b_ = (b);                                              \
   5506        int64x1_t a_ = (a);                                              \
   5507        int64x1_t result;                                                \
   5508        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
   5509                 : "=w"(result)                                          \
   5510                 : "w"(a_), "w"(b_), "i"(c)                              \
   5511                 : /* No clobbers */);                                   \
   5512        result;                                                          \
   5513      })
   5514 
   5515 #define vext_u8(a, b, c)                                                \
   5516   __extension__                                                         \
   5517     ({                                                                  \
   5518        uint8x8_t b_ = (b);                                              \
   5519        uint8x8_t a_ = (a);                                              \
   5520        uint8x8_t result;                                                \
   5521        __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
   5522                 : "=w"(result)                                          \
   5523                 : "w"(a_), "w"(b_), "i"(c)                              \
   5524                 : /* No clobbers */);                                   \
   5525        result;                                                          \
   5526      })
   5527 
   5528 #define vext_u16(a, b, c)                                               \
   5529   __extension__                                                         \
   5530     ({                                                                  \
   5531        uint16x4_t b_ = (b);                                             \
   5532        uint16x4_t a_ = (a);                                             \
   5533        uint16x4_t result;                                               \
   5534        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
   5535                 : "=w"(result)                                          \
   5536                 : "w"(a_), "w"(b_), "i"(c)                              \
   5537                 : /* No clobbers */);                                   \
   5538        result;                                                          \
   5539      })
   5540 
   5541 #define vext_u32(a, b, c)                                               \
   5542   __extension__                                                         \
   5543     ({                                                                  \
   5544        uint32x2_t b_ = (b);                                             \
   5545        uint32x2_t a_ = (a);                                             \
   5546        uint32x2_t result;                                               \
   5547        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
   5548                 : "=w"(result)                                          \
   5549                 : "w"(a_), "w"(b_), "i"(c)                              \
   5550                 : /* No clobbers */);                                   \
   5551        result;                                                          \
   5552      })
   5553 
   5554 #define vext_u64(a, b, c)                                               \
   5555   __extension__                                                         \
   5556     ({                                                                  \
   5557        uint64x1_t b_ = (b);                                             \
   5558        uint64x1_t a_ = (a);                                             \
   5559        uint64x1_t result;                                               \
   5560        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
   5561                 : "=w"(result)                                          \
   5562                 : "w"(a_), "w"(b_), "i"(c)                              \
   5563                 : /* No clobbers */);                                   \
   5564        result;                                                          \
   5565      })
   5566 
   5567 #define vextq_f32(a, b, c)                                              \
   5568   __extension__                                                         \
   5569     ({                                                                  \
   5570        float32x4_t b_ = (b);                                            \
   5571        float32x4_t a_ = (a);                                            \
   5572        float32x4_t result;                                              \
   5573        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
   5574                 : "=w"(result)                                          \
   5575                 : "w"(a_), "w"(b_), "i"(c)                              \
   5576                 : /* No clobbers */);                                   \
   5577        result;                                                          \
   5578      })
   5579 
   5580 #define vextq_f64(a, b, c)                                              \
   5581   __extension__                                                         \
   5582     ({                                                                  \
   5583        float64x2_t b_ = (b);                                            \
   5584        float64x2_t a_ = (a);                                            \
   5585        float64x2_t result;                                              \
   5586        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
   5587                 : "=w"(result)                                          \
   5588                 : "w"(a_), "w"(b_), "i"(c)                              \
   5589                 : /* No clobbers */);                                   \
   5590        result;                                                          \
   5591      })
   5592 
   5593 #define vextq_p8(a, b, c)                                               \
   5594   __extension__                                                         \
   5595     ({                                                                  \
   5596        poly8x16_t b_ = (b);                                             \
   5597        poly8x16_t a_ = (a);                                             \
   5598        poly8x16_t result;                                               \
   5599        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
   5600                 : "=w"(result)                                          \
   5601                 : "w"(a_), "w"(b_), "i"(c)                              \
   5602                 : /* No clobbers */);                                   \
   5603        result;                                                          \
   5604      })
   5605 
   5606 #define vextq_p16(a, b, c)                                              \
   5607   __extension__                                                         \
   5608     ({                                                                  \
   5609        poly16x8_t b_ = (b);                                             \
   5610        poly16x8_t a_ = (a);                                             \
   5611        poly16x8_t result;                                               \
   5612        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
   5613                 : "=w"(result)                                          \
   5614                 : "w"(a_), "w"(b_), "i"(c)                              \
   5615                 : /* No clobbers */);                                   \
   5616        result;                                                          \
   5617      })
   5618 
   5619 #define vextq_s8(a, b, c)                                               \
   5620   __extension__                                                         \
   5621     ({                                                                  \
   5622        int8x16_t b_ = (b);                                              \
   5623        int8x16_t a_ = (a);                                              \
   5624        int8x16_t result;                                                \
   5625        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
   5626                 : "=w"(result)                                          \
   5627                 : "w"(a_), "w"(b_), "i"(c)                              \
   5628                 : /* No clobbers */);                                   \
   5629        result;                                                          \
   5630      })
   5631 
   5632 #define vextq_s16(a, b, c)                                              \
   5633   __extension__                                                         \
   5634     ({                                                                  \
   5635        int16x8_t b_ = (b);                                              \
   5636        int16x8_t a_ = (a);                                              \
   5637        int16x8_t result;                                                \
   5638        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
   5639                 : "=w"(result)                                          \
   5640                 : "w"(a_), "w"(b_), "i"(c)                              \
   5641                 : /* No clobbers */);                                   \
   5642        result;                                                          \
   5643      })
   5644 
   5645 #define vextq_s32(a, b, c)                                              \
   5646   __extension__                                                         \
   5647     ({                                                                  \
   5648        int32x4_t b_ = (b);                                              \
   5649        int32x4_t a_ = (a);                                              \
   5650        int32x4_t result;                                                \
   5651        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
   5652                 : "=w"(result)                                          \
   5653                 : "w"(a_), "w"(b_), "i"(c)                              \
   5654                 : /* No clobbers */);                                   \
   5655        result;                                                          \
   5656      })
   5657 
   5658 #define vextq_s64(a, b, c)                                              \
   5659   __extension__                                                         \
   5660     ({                                                                  \
   5661        int64x2_t b_ = (b);                                              \
   5662        int64x2_t a_ = (a);                                              \
   5663        int64x2_t result;                                                \
   5664        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
   5665                 : "=w"(result)                                          \
   5666                 : "w"(a_), "w"(b_), "i"(c)                              \
   5667                 : /* No clobbers */);                                   \
   5668        result;                                                          \
   5669      })
   5670 
   5671 #define vextq_u8(a, b, c)                                               \
   5672   __extension__                                                         \
   5673     ({                                                                  \
   5674        uint8x16_t b_ = (b);                                             \
   5675        uint8x16_t a_ = (a);                                             \
   5676        uint8x16_t result;                                               \
   5677        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
   5678                 : "=w"(result)                                          \
   5679                 : "w"(a_), "w"(b_), "i"(c)                              \
   5680                 : /* No clobbers */);                                   \
   5681        result;                                                          \
   5682      })
   5683 
   5684 #define vextq_u16(a, b, c)                                              \
   5685   __extension__                                                         \
   5686     ({                                                                  \
   5687        uint16x8_t b_ = (b);                                             \
   5688        uint16x8_t a_ = (a);                                             \
   5689        uint16x8_t result;                                               \
   5690        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
   5691                 : "=w"(result)                                          \
   5692                 : "w"(a_), "w"(b_), "i"(c)                              \
   5693                 : /* No clobbers */);                                   \
   5694        result;                                                          \
   5695      })
   5696 
   5697 #define vextq_u32(a, b, c)                                              \
   5698   __extension__                                                         \
   5699     ({                                                                  \
   5700        uint32x4_t b_ = (b);                                             \
   5701        uint32x4_t a_ = (a);                                             \
   5702        uint32x4_t result;                                               \
   5703        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
   5704                 : "=w"(result)                                          \
   5705                 : "w"(a_), "w"(b_), "i"(c)                              \
   5706                 : /* No clobbers */);                                   \
   5707        result;                                                          \
   5708      })
   5709 
   5710 #define vextq_u64(a, b, c)                                              \
   5711   __extension__                                                         \
   5712     ({                                                                  \
   5713        uint64x2_t b_ = (b);                                             \
   5714        uint64x2_t a_ = (a);                                             \
   5715        uint64x2_t result;                                               \
   5716        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
   5717                 : "=w"(result)                                          \
   5718                 : "w"(a_), "w"(b_), "i"(c)                              \
   5719                 : /* No clobbers */);                                   \
   5720        result;                                                          \
   5721      })
   5722 
   5723 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   5724 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
   5725 {
   5726   float32x2_t result;
   5727   __asm__ ("fmla %0.2s,%2.2s,%3.2s"
   5728            : "=w"(result)
   5729            : "0"(a), "w"(b), "w"(c)
   5730            : /* No clobbers */);
   5731   return result;
   5732 }
   5733 
   5734 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   5735 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
   5736 {
   5737   float32x4_t result;
   5738   __asm__ ("fmla %0.4s,%2.4s,%3.4s"
   5739            : "=w"(result)
   5740            : "0"(a), "w"(b), "w"(c)
   5741            : /* No clobbers */);
   5742   return result;
   5743 }
   5744 
   5745 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   5746 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
   5747 {
   5748   float64x2_t result;
   5749   __asm__ ("fmla %0.2d,%2.2d,%3.2d"
   5750            : "=w"(result)
   5751            : "0"(a), "w"(b), "w"(c)
   5752            : /* No clobbers */);
   5753   return result;
   5754 }
   5755 
   5756 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   5757 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
   5758 {
   5759   float32x2_t result;
   5760   __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
   5761            : "=w"(result)
   5762            : "0"(a), "w"(b), "w"(c)
   5763            : /* No clobbers */);
   5764   return result;
   5765 }
   5766 
   5767 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   5768 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
   5769 {
   5770   float32x4_t result;
   5771   __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
   5772            : "=w"(result)
   5773            : "0"(a), "w"(b), "w"(c)
   5774            : /* No clobbers */);
   5775   return result;
   5776 }
   5777 
   5778 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   5779 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
   5780 {
   5781   float64x2_t result;
   5782   __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
   5783            : "=w"(result)
   5784            : "0"(a), "w"(b), "w"(c)
   5785            : /* No clobbers */);
   5786   return result;
   5787 }
   5788 
   5789 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   5790 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
   5791 {
   5792   float32x2_t result;
   5793   __asm__ ("fmls %0.2s,%2.2s,%3.2s"
   5794            : "=w"(result)
   5795            : "0"(a), "w"(b), "w"(c)
   5796            : /* No clobbers */);
   5797   return result;
   5798 }
   5799 
   5800 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   5801 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
   5802 {
   5803   float32x4_t result;
   5804   __asm__ ("fmls %0.4s,%2.4s,%3.4s"
   5805            : "=w"(result)
   5806            : "0"(a), "w"(b), "w"(c)
   5807            : /* No clobbers */);
   5808   return result;
   5809 }
   5810 
   5811 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   5812 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
   5813 {
   5814   float64x2_t result;
   5815   __asm__ ("fmls %0.2d,%2.2d,%3.2d"
   5816            : "=w"(result)
   5817            : "0"(a), "w"(b), "w"(c)
   5818            : /* No clobbers */);
   5819   return result;
   5820 }
   5821 
   5822 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   5823 vget_high_f32 (float32x4_t a)
   5824 {
   5825   float32x2_t result;
   5826   __asm__ ("ins %0.d[0], %1.d[1]"
   5827            : "=w"(result)
   5828            : "w"(a)
   5829            : /* No clobbers */);
   5830   return result;
   5831 }
   5832 
   5833 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   5834 vget_high_f64 (float64x2_t a)
   5835 {
   5836   float64x1_t result;
   5837   __asm__ ("ins %0.d[0], %1.d[1]"
   5838            : "=w"(result)
   5839            : "w"(a)
   5840            : /* No clobbers */);
   5841   return result;
   5842 }
   5843 
   5844 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   5845 vget_high_p8 (poly8x16_t a)
   5846 {
   5847   poly8x8_t result;
   5848   __asm__ ("ins %0.d[0], %1.d[1]"
   5849            : "=w"(result)
   5850            : "w"(a)
   5851            : /* No clobbers */);
   5852   return result;
   5853 }
   5854 
   5855 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   5856 vget_high_p16 (poly16x8_t a)
   5857 {
   5858   poly16x4_t result;
   5859   __asm__ ("ins %0.d[0], %1.d[1]"
   5860            : "=w"(result)
   5861            : "w"(a)
   5862            : /* No clobbers */);
   5863   return result;
   5864 }
   5865 
   5866 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   5867 vget_high_s8 (int8x16_t a)
   5868 {
   5869   int8x8_t result;
   5870   __asm__ ("ins %0.d[0], %1.d[1]"
   5871            : "=w"(result)
   5872            : "w"(a)
   5873            : /* No clobbers */);
   5874   return result;
   5875 }
   5876 
   5877 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   5878 vget_high_s16 (int16x8_t a)
   5879 {
   5880   int16x4_t result;
   5881   __asm__ ("ins %0.d[0], %1.d[1]"
   5882            : "=w"(result)
   5883            : "w"(a)
   5884            : /* No clobbers */);
   5885   return result;
   5886 }
   5887 
   5888 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   5889 vget_high_s32 (int32x4_t a)
   5890 {
   5891   int32x2_t result;
   5892   __asm__ ("ins %0.d[0], %1.d[1]"
   5893            : "=w"(result)
   5894            : "w"(a)
   5895            : /* No clobbers */);
   5896   return result;
   5897 }
   5898 
   5899 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   5900 vget_high_s64 (int64x2_t a)
   5901 {
   5902   int64x1_t result;
   5903   __asm__ ("ins %0.d[0], %1.d[1]"
   5904            : "=w"(result)
   5905            : "w"(a)
   5906            : /* No clobbers */);
   5907   return result;
   5908 }
   5909 
   5910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   5911 vget_high_u8 (uint8x16_t a)
   5912 {
   5913   uint8x8_t result;
   5914   __asm__ ("ins %0.d[0], %1.d[1]"
   5915            : "=w"(result)
   5916            : "w"(a)
   5917            : /* No clobbers */);
   5918   return result;
   5919 }
   5920 
   5921 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   5922 vget_high_u16 (uint16x8_t a)
   5923 {
   5924   uint16x4_t result;
   5925   __asm__ ("ins %0.d[0], %1.d[1]"
   5926            : "=w"(result)
   5927            : "w"(a)
   5928            : /* No clobbers */);
   5929   return result;
   5930 }
   5931 
   5932 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   5933 vget_high_u32 (uint32x4_t a)
   5934 {
   5935   uint32x2_t result;
   5936   __asm__ ("ins %0.d[0], %1.d[1]"
   5937            : "=w"(result)
   5938            : "w"(a)
   5939            : /* No clobbers */);
   5940   return result;
   5941 }
   5942 
   5943 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   5944 vget_high_u64 (uint64x2_t a)
   5945 {
   5946   uint64x1_t result;
   5947   __asm__ ("ins %0.d[0], %1.d[1]"
   5948            : "=w"(result)
   5949            : "w"(a)
   5950            : /* No clobbers */);
   5951   return result;
   5952 }
   5953 
   5954 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   5955 vhsub_s8 (int8x8_t a, int8x8_t b)
   5956 {
   5957   int8x8_t result;
   5958   __asm__ ("shsub %0.8b, %1.8b, %2.8b"
   5959            : "=w"(result)
   5960            : "w"(a), "w"(b)
   5961            : /* No clobbers */);
   5962   return result;
   5963 }
   5964 
   5965 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   5966 vhsub_s16 (int16x4_t a, int16x4_t b)
   5967 {
   5968   int16x4_t result;
   5969   __asm__ ("shsub %0.4h, %1.4h, %2.4h"
   5970            : "=w"(result)
   5971            : "w"(a), "w"(b)
   5972            : /* No clobbers */);
   5973   return result;
   5974 }
   5975 
   5976 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   5977 vhsub_s32 (int32x2_t a, int32x2_t b)
   5978 {
   5979   int32x2_t result;
   5980   __asm__ ("shsub %0.2s, %1.2s, %2.2s"
   5981            : "=w"(result)
   5982            : "w"(a), "w"(b)
   5983            : /* No clobbers */);
   5984   return result;
   5985 }
   5986 
   5987 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   5988 vhsub_u8 (uint8x8_t a, uint8x8_t b)
   5989 {
   5990   uint8x8_t result;
   5991   __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
   5992            : "=w"(result)
   5993            : "w"(a), "w"(b)
   5994            : /* No clobbers */);
   5995   return result;
   5996 }
   5997 
   5998 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   5999 vhsub_u16 (uint16x4_t a, uint16x4_t b)
   6000 {
   6001   uint16x4_t result;
   6002   __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
   6003            : "=w"(result)
   6004            : "w"(a), "w"(b)
   6005            : /* No clobbers */);
   6006   return result;
   6007 }
   6008 
   6009 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   6010 vhsub_u32 (uint32x2_t a, uint32x2_t b)
   6011 {
   6012   uint32x2_t result;
   6013   __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
   6014            : "=w"(result)
   6015            : "w"(a), "w"(b)
   6016            : /* No clobbers */);
   6017   return result;
   6018 }
   6019 
   6020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   6021 vhsubq_s8 (int8x16_t a, int8x16_t b)
   6022 {
   6023   int8x16_t result;
   6024   __asm__ ("shsub %0.16b, %1.16b, %2.16b"
   6025            : "=w"(result)
   6026            : "w"(a), "w"(b)
   6027            : /* No clobbers */);
   6028   return result;
   6029 }
   6030 
   6031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   6032 vhsubq_s16 (int16x8_t a, int16x8_t b)
   6033 {
   6034   int16x8_t result;
   6035   __asm__ ("shsub %0.8h, %1.8h, %2.8h"
   6036            : "=w"(result)
   6037            : "w"(a), "w"(b)
   6038            : /* No clobbers */);
   6039   return result;
   6040 }
   6041 
   6042 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   6043 vhsubq_s32 (int32x4_t a, int32x4_t b)
   6044 {
   6045   int32x4_t result;
   6046   __asm__ ("shsub %0.4s, %1.4s, %2.4s"
   6047            : "=w"(result)
   6048            : "w"(a), "w"(b)
   6049            : /* No clobbers */);
   6050   return result;
   6051 }
   6052 
   6053 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   6054 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
   6055 {
   6056   uint8x16_t result;
   6057   __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
   6058            : "=w"(result)
   6059            : "w"(a), "w"(b)
   6060            : /* No clobbers */);
   6061   return result;
   6062 }
   6063 
   6064 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   6065 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
   6066 {
   6067   uint16x8_t result;
   6068   __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
   6069            : "=w"(result)
   6070            : "w"(a), "w"(b)
   6071            : /* No clobbers */);
   6072   return result;
   6073 }
   6074 
   6075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   6076 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
   6077 {
   6078   uint32x4_t result;
   6079   __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
   6080            : "=w"(result)
   6081            : "w"(a), "w"(b)
   6082            : /* No clobbers */);
   6083   return result;
   6084 }
   6085 
   6086 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   6087 vld1_dup_f32 (const float32_t * a)
   6088 {
   6089   float32x2_t result;
   6090   __asm__ ("ld1r {%0.2s}, %1"
   6091 	   : "=w"(result)
   6092 	   : "Utv"(*a)
   6093 	   : /* No clobbers */);
   6094   return result;
   6095 }
   6096 
   6097 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   6098 vld1_dup_f64 (const float64_t * a)
   6099 {
   6100   float64x1_t result;
   6101   __asm__ ("ld1r {%0.1d}, %1"
   6102 	   : "=w"(result)
   6103 	   : "Utv"(*a)
   6104 	   : /* No clobbers */);
   6105   return result;
   6106 }
   6107 
   6108 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   6109 vld1_dup_p8 (const poly8_t * a)
   6110 {
   6111   poly8x8_t result;
   6112   __asm__ ("ld1r {%0.8b}, %1"
   6113 	   : "=w"(result)
   6114 	   : "Utv"(*a)
   6115 	   : /* No clobbers */);
   6116   return result;
   6117 }
   6118 
   6119 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   6120 vld1_dup_p16 (const poly16_t * a)
   6121 {
   6122   poly16x4_t result;
   6123   __asm__ ("ld1r {%0.4h}, %1"
   6124 	   : "=w"(result)
   6125 	   : "Utv"(*a)
   6126 	   : /* No clobbers */);
   6127   return result;
   6128 }
   6129 
   6130 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   6131 vld1_dup_s8 (const int8_t * a)
   6132 {
   6133   int8x8_t result;
   6134   __asm__ ("ld1r {%0.8b}, %1"
   6135 	   : "=w"(result)
   6136 	   : "Utv"(*a)
   6137 	   : /* No clobbers */);
   6138   return result;
   6139 }
   6140 
   6141 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   6142 vld1_dup_s16 (const int16_t * a)
   6143 {
   6144   int16x4_t result;
   6145   __asm__ ("ld1r {%0.4h}, %1"
   6146 	   : "=w"(result)
   6147 	   : "Utv"(*a)
   6148 	   : /* No clobbers */);
   6149   return result;
   6150 }
   6151 
   6152 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   6153 vld1_dup_s32 (const int32_t * a)
   6154 {
   6155   int32x2_t result;
   6156   __asm__ ("ld1r {%0.2s}, %1"
   6157 	   : "=w"(result)
   6158 	   : "Utv"(*a)
   6159 	   : /* No clobbers */);
   6160   return result;
   6161 }
   6162 
   6163 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   6164 vld1_dup_s64 (const int64_t * a)
   6165 {
   6166   int64x1_t result;
   6167   __asm__ ("ld1r {%0.1d}, %1"
   6168 	   : "=w"(result)
   6169 	   : "Utv"(*a)
   6170 	   : /* No clobbers */);
   6171   return result;
   6172 }
   6173 
   6174 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   6175 vld1_dup_u8 (const uint8_t * a)
   6176 {
   6177   uint8x8_t result;
   6178   __asm__ ("ld1r {%0.8b}, %1"
   6179 	   : "=w"(result)
   6180 	   : "Utv"(*a)
   6181 	   : /* No clobbers */);
   6182   return result;
   6183 }
   6184 
   6185 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   6186 vld1_dup_u16 (const uint16_t * a)
   6187 {
   6188   uint16x4_t result;
   6189   __asm__ ("ld1r {%0.4h}, %1"
   6190 	   : "=w"(result)
   6191 	   : "Utv"(*a)
   6192 	   : /* No clobbers */);
   6193   return result;
   6194 }
   6195 
   6196 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   6197 vld1_dup_u32 (const uint32_t * a)
   6198 {
   6199   uint32x2_t result;
   6200   __asm__ ("ld1r {%0.2s}, %1"
   6201 	   : "=w"(result)
   6202 	   : "Utv"(*a)
   6203 	   : /* No clobbers */);
   6204   return result;
   6205 }
   6206 
   6207 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   6208 vld1_dup_u64 (const uint64_t * a)
   6209 {
   6210   uint64x1_t result;
   6211   __asm__ ("ld1r {%0.1d}, %1"
   6212 	   : "=w"(result)
   6213 	   : "Utv"(*a)
   6214 	   : /* No clobbers */);
   6215   return result;
   6216 }
   6217 
   6218 #define vld1_lane_f32(a, b, c)                                          \
   6219   __extension__                                                         \
   6220     ({                                                                  \
   6221        float32x2_t b_ = (b);                                            \
   6222        const float32_t * a_ = (a);                                      \
   6223        float32x2_t result;                                              \
   6224        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
   6225                 : "=w"(result)                                          \
   6226                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6227                 : /* No clobbers */);                                   \
   6228        result;                                                          \
   6229      })
   6230 
   6231 #define vld1_lane_f64(a, b, c)                                          \
   6232   __extension__                                                         \
   6233     ({                                                                  \
   6234        float64x1_t b_ = (b);                                            \
   6235        const float64_t * a_ = (a);                                      \
   6236        float64x1_t result;                                              \
   6237        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
   6238                 : "=w"(result)                                          \
   6239                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6240                 : /* No clobbers */);                                   \
   6241        result;                                                          \
   6242      })
   6243 
   6244 #define vld1_lane_p8(a, b, c)                                           \
   6245   __extension__                                                         \
   6246     ({                                                                  \
   6247        poly8x8_t b_ = (b);                                              \
   6248        const poly8_t * a_ = (a);                                        \
   6249        poly8x8_t result;                                                \
   6250        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
   6251                 : "=w"(result)                                          \
   6252                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6253                 : /* No clobbers */);                                   \
   6254        result;                                                          \
   6255      })
   6256 
   6257 #define vld1_lane_p16(a, b, c)                                          \
   6258   __extension__                                                         \
   6259     ({                                                                  \
   6260        poly16x4_t b_ = (b);                                             \
   6261        const poly16_t * a_ = (a);                                       \
   6262        poly16x4_t result;                                               \
   6263        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
   6264                 : "=w"(result)                                          \
   6265                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6266                 : /* No clobbers */);                                   \
   6267        result;                                                          \
   6268      })
   6269 
   6270 #define vld1_lane_s8(a, b, c)                                           \
   6271   __extension__                                                         \
   6272     ({                                                                  \
   6273        int8x8_t b_ = (b);                                               \
   6274        const int8_t * a_ = (a);                                         \
   6275        int8x8_t result;                                                 \
   6276        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
   6277                 : "=w"(result)                                          \
   6278                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6279                 : /* No clobbers */);                                   \
   6280        result;                                                          \
   6281      })
   6282 
   6283 #define vld1_lane_s16(a, b, c)                                          \
   6284   __extension__                                                         \
   6285     ({                                                                  \
   6286        int16x4_t b_ = (b);                                              \
   6287        const int16_t * a_ = (a);                                        \
   6288        int16x4_t result;                                                \
   6289        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
   6290                 : "=w"(result)                                          \
   6291                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6292                 : /* No clobbers */);                                   \
   6293        result;                                                          \
   6294      })
   6295 
   6296 #define vld1_lane_s32(a, b, c)                                          \
   6297   __extension__                                                         \
   6298     ({                                                                  \
   6299        int32x2_t b_ = (b);                                              \
   6300        const int32_t * a_ = (a);                                        \
   6301        int32x2_t result;                                                \
   6302        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
   6303                 : "=w"(result)                                          \
   6304                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6305                 : /* No clobbers */);                                   \
   6306        result;                                                          \
   6307      })
   6308 
   6309 #define vld1_lane_s64(a, b, c)                                          \
   6310   __extension__                                                         \
   6311     ({                                                                  \
   6312        int64x1_t b_ = (b);                                              \
   6313        const int64_t * a_ = (a);                                        \
   6314        int64x1_t result;                                                \
   6315        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
   6316                 : "=w"(result)                                          \
   6317                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6318                 : /* No clobbers */);                                   \
   6319        result;                                                          \
   6320      })
   6321 
   6322 #define vld1_lane_u8(a, b, c)                                           \
   6323   __extension__                                                         \
   6324     ({                                                                  \
   6325        uint8x8_t b_ = (b);                                              \
   6326        const uint8_t * a_ = (a);                                        \
   6327        uint8x8_t result;                                                \
   6328        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
   6329                 : "=w"(result)                                          \
   6330                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6331                 : /* No clobbers */);                                   \
   6332        result;                                                          \
   6333      })
   6334 
   6335 #define vld1_lane_u16(a, b, c)                                          \
   6336   __extension__                                                         \
   6337     ({                                                                  \
   6338        uint16x4_t b_ = (b);                                             \
   6339        const uint16_t * a_ = (a);                                       \
   6340        uint16x4_t result;                                               \
   6341        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
   6342                 : "=w"(result)                                          \
   6343                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6344                 : /* No clobbers */);                                   \
   6345        result;                                                          \
   6346      })
   6347 
   6348 #define vld1_lane_u32(a, b, c)                                          \
   6349   __extension__                                                         \
   6350     ({                                                                  \
   6351        uint32x2_t b_ = (b);                                             \
   6352        const uint32_t * a_ = (a);                                       \
   6353        uint32x2_t result;                                               \
   6354        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
   6355                 : "=w"(result)                                          \
   6356                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6357                 : /* No clobbers */);                                   \
   6358        result;                                                          \
   6359      })
   6360 
   6361 #define vld1_lane_u64(a, b, c)                                          \
   6362   __extension__                                                         \
   6363     ({                                                                  \
   6364        uint64x1_t b_ = (b);                                             \
   6365        const uint64_t * a_ = (a);                                       \
   6366        uint64x1_t result;                                               \
   6367        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
   6368                 : "=w"(result)                                          \
   6369                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
   6370                 : /* No clobbers */);                                   \
   6371        result;                                                          \
   6372      })
   6373 
   6374 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   6375 vld1q_dup_f32 (const float32_t * a)
   6376 {
   6377   float32x4_t result;
   6378   __asm__ ("ld1r {%0.4s}, %1"
   6379 	   : "=w"(result)
   6380 	   : "Utv"(*a)
   6381 	   : /* No clobbers */);
   6382   return result;
   6383 }
   6384 
   6385 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   6386 vld1q_dup_f64 (const float64_t * a)
   6387 {
   6388   float64x2_t result;
   6389   __asm__ ("ld1r {%0.2d}, %1"
   6390 	   : "=w"(result)
   6391 	   : "Utv"(*a)
   6392 	   : /* No clobbers */);
   6393   return result;
   6394 }
   6395 
   6396 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   6397 vld1q_dup_p8 (const poly8_t * a)
   6398 {
   6399   poly8x16_t result;
   6400   __asm__ ("ld1r {%0.16b}, %1"
   6401 	   : "=w"(result)
   6402 	   : "Utv"(*a)
   6403 	   : /* No clobbers */);
   6404   return result;
   6405 }
   6406 
   6407 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   6408 vld1q_dup_p16 (const poly16_t * a)
   6409 {
   6410   poly16x8_t result;
   6411   __asm__ ("ld1r {%0.8h}, %1"
   6412 	   : "=w"(result)
   6413 	   : "Utv"(*a)
   6414 	   : /* No clobbers */);
   6415   return result;
   6416 }
   6417 
   6418 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   6419 vld1q_dup_s8 (const int8_t * a)
   6420 {
   6421   int8x16_t result;
   6422   __asm__ ("ld1r {%0.16b}, %1"
   6423 	   : "=w"(result)
   6424 	   : "Utv"(*a)
   6425 	   : /* No clobbers */);
   6426   return result;
   6427 }
   6428 
   6429 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   6430 vld1q_dup_s16 (const int16_t * a)
   6431 {
   6432   int16x8_t result;
   6433   __asm__ ("ld1r {%0.8h}, %1"
   6434 	   : "=w"(result)
   6435 	   : "Utv"(*a)
   6436 	   : /* No clobbers */);
   6437   return result;
   6438 }
   6439 
   6440 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   6441 vld1q_dup_s32 (const int32_t * a)
   6442 {
   6443   int32x4_t result;
   6444   __asm__ ("ld1r {%0.4s}, %1"
   6445 	   : "=w"(result)
   6446 	   : "Utv"(*a)
   6447 	   : /* No clobbers */);
   6448   return result;
   6449 }
   6450 
   6451 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   6452 vld1q_dup_s64 (const int64_t * a)
   6453 {
   6454   int64x2_t result;
   6455   __asm__ ("ld1r {%0.2d}, %1"
   6456 	   : "=w"(result)
   6457 	   : "Utv"(*a)
   6458 	   : /* No clobbers */);
   6459   return result;
   6460 }
   6461 
   6462 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   6463 vld1q_dup_u8 (const uint8_t * a)
   6464 {
   6465   uint8x16_t result;
   6466   __asm__ ("ld1r {%0.16b}, %1"
   6467 	   : "=w"(result)
   6468 	   : "Utv"(*a)
   6469 	   : /* No clobbers */);
   6470   return result;
   6471 }
   6472 
   6473 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   6474 vld1q_dup_u16 (const uint16_t * a)
   6475 {
   6476   uint16x8_t result;
   6477   __asm__ ("ld1r {%0.8h}, %1"
   6478 	   : "=w"(result)
   6479 	   : "Utv"(*a)
   6480 	   : /* No clobbers */);
   6481   return result;
   6482 }
   6483 
   6484 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   6485 vld1q_dup_u32 (const uint32_t * a)
   6486 {
   6487   uint32x4_t result;
   6488   __asm__ ("ld1r {%0.4s}, %1"
   6489 	   : "=w"(result)
   6490 	   : "Utv"(*a)
   6491 	   : /* No clobbers */);
   6492   return result;
   6493 }
   6494 
   6495 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   6496 vld1q_dup_u64 (const uint64_t * a)
   6497 {
   6498   uint64x2_t result;
   6499   __asm__ ("ld1r {%0.2d}, %1"
   6500 	   : "=w"(result)
   6501 	   : "Utv"(*a)
   6502 	   : /* No clobbers */);
   6503   return result;
   6504 }
   6505 
   6506 #define vld1q_lane_f32(a, b, c)                                         \
   6507   __extension__                                                         \
   6508     ({                                                                  \
   6509        float32x4_t b_ = (b);                                            \
   6510        const float32_t * a_ = (a);                                      \
   6511        float32x4_t result;                                              \
   6512        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
   6513                 : "=w"(result)                                          \
   6514                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6515                 : /* No clobbers */);                                   \
   6516        result;                                                          \
   6517      })
   6518 
   6519 #define vld1q_lane_f64(a, b, c)                                         \
   6520   __extension__                                                         \
   6521     ({                                                                  \
   6522        float64x2_t b_ = (b);                                            \
   6523        const float64_t * a_ = (a);                                      \
   6524        float64x2_t result;                                              \
   6525        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
   6526                 : "=w"(result)                                          \
   6527                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6528                 : /* No clobbers */);                                   \
   6529        result;                                                          \
   6530      })
   6531 
   6532 #define vld1q_lane_p8(a, b, c)                                          \
   6533   __extension__                                                         \
   6534     ({                                                                  \
   6535        poly8x16_t b_ = (b);                                             \
   6536        const poly8_t * a_ = (a);                                        \
   6537        poly8x16_t result;                                               \
   6538        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
   6539                 : "=w"(result)                                          \
   6540                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6541                 : /* No clobbers */);                                   \
   6542        result;                                                          \
   6543      })
   6544 
   6545 #define vld1q_lane_p16(a, b, c)                                         \
   6546   __extension__                                                         \
   6547     ({                                                                  \
   6548        poly16x8_t b_ = (b);                                             \
   6549        const poly16_t * a_ = (a);                                       \
   6550        poly16x8_t result;                                               \
   6551        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
   6552                 : "=w"(result)                                          \
   6553                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6554                 : /* No clobbers */);                                   \
   6555        result;                                                          \
   6556      })
   6557 
   6558 #define vld1q_lane_s8(a, b, c)                                          \
   6559   __extension__                                                         \
   6560     ({                                                                  \
   6561        int8x16_t b_ = (b);                                              \
   6562        const int8_t * a_ = (a);                                         \
   6563        int8x16_t result;                                                \
   6564        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
   6565                 : "=w"(result)                                          \
   6566                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6567                 : /* No clobbers */);                                   \
   6568        result;                                                          \
   6569      })
   6570 
   6571 #define vld1q_lane_s16(a, b, c)                                         \
   6572   __extension__                                                         \
   6573     ({                                                                  \
   6574        int16x8_t b_ = (b);                                              \
   6575        const int16_t * a_ = (a);                                        \
   6576        int16x8_t result;                                                \
   6577        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
   6578                 : "=w"(result)                                          \
   6579                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6580                 : /* No clobbers */);                                   \
   6581        result;                                                          \
   6582      })
   6583 
   6584 #define vld1q_lane_s32(a, b, c)                                         \
   6585   __extension__                                                         \
   6586     ({                                                                  \
   6587        int32x4_t b_ = (b);                                              \
   6588        const int32_t * a_ = (a);                                        \
   6589        int32x4_t result;                                                \
   6590        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
   6591                 : "=w"(result)                                          \
   6592                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6593                 : /* No clobbers */);                                   \
   6594        result;                                                          \
   6595      })
   6596 
   6597 #define vld1q_lane_s64(a, b, c)                                         \
   6598   __extension__                                                         \
   6599     ({                                                                  \
   6600        int64x2_t b_ = (b);                                              \
   6601        const int64_t * a_ = (a);                                        \
   6602        int64x2_t result;                                                \
   6603        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
   6604                 : "=w"(result)                                          \
   6605                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6606                 : /* No clobbers */);                                   \
   6607        result;                                                          \
   6608      })
   6609 
   6610 #define vld1q_lane_u8(a, b, c)                                          \
   6611   __extension__                                                         \
   6612     ({                                                                  \
   6613        uint8x16_t b_ = (b);                                             \
   6614        const uint8_t * a_ = (a);                                        \
   6615        uint8x16_t result;                                               \
   6616        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
   6617                 : "=w"(result)                                          \
   6618                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6619                 : /* No clobbers */);                                   \
   6620        result;                                                          \
   6621      })
   6622 
   6623 #define vld1q_lane_u16(a, b, c)                                         \
   6624   __extension__                                                         \
   6625     ({                                                                  \
   6626        uint16x8_t b_ = (b);                                             \
   6627        const uint16_t * a_ = (a);                                       \
   6628        uint16x8_t result;                                               \
   6629        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
   6630                 : "=w"(result)                                          \
   6631                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6632                 : /* No clobbers */);                                   \
   6633        result;                                                          \
   6634      })
   6635 
   6636 #define vld1q_lane_u32(a, b, c)                                         \
   6637   __extension__                                                         \
   6638     ({                                                                  \
   6639        uint32x4_t b_ = (b);                                             \
   6640        const uint32_t * a_ = (a);                                       \
   6641        uint32x4_t result;                                               \
   6642        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
   6643                 : "=w"(result)                                          \
   6644                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6645                 : /* No clobbers */);                                   \
   6646        result;                                                          \
   6647      })
   6648 
   6649 #define vld1q_lane_u64(a, b, c)                                         \
   6650   __extension__                                                         \
   6651     ({                                                                  \
   6652        uint64x2_t b_ = (b);                                             \
   6653        const uint64_t * a_ = (a);                                       \
   6654        uint64x2_t result;                                               \
   6655        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
   6656                 : "=w"(result)                                          \
   6657                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
   6658                 : /* No clobbers */);                                   \
   6659        result;                                                          \
   6660      })
   6661 
   6662 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   6663 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
   6664 {
   6665   float32x2_t result;
   6666   float32x2_t t1;
   6667   __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
   6668            : "=w"(result), "=w"(t1)
   6669            : "0"(a), "w"(b), "w"(c)
   6670            : /* No clobbers */);
   6671   return result;
   6672 }
   6673 
   6674 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   6675 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
   6676 {
   6677   int16x4_t result;
   6678   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
   6679            : "=w"(result)
   6680            : "0"(a), "w"(b), "x"(c)
   6681            : /* No clobbers */);
   6682   return result;
   6683 }
   6684 
   6685 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   6686 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
   6687 {
   6688   int32x2_t result;
   6689   __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
   6690            : "=w"(result)
   6691            : "0"(a), "w"(b), "w"(c)
   6692            : /* No clobbers */);
   6693   return result;
   6694 }
   6695 
   6696 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   6697 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
   6698 {
   6699   uint16x4_t result;
   6700   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
   6701            : "=w"(result)
   6702            : "0"(a), "w"(b), "x"(c)
   6703            : /* No clobbers */);
   6704   return result;
   6705 }
   6706 
   6707 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   6708 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
   6709 {
   6710   uint32x2_t result;
   6711   __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
   6712            : "=w"(result)
   6713            : "0"(a), "w"(b), "w"(c)
   6714            : /* No clobbers */);
   6715   return result;
   6716 }
   6717 
   6718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   6719 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
   6720 {
   6721   int8x8_t result;
   6722   __asm__ ("mla %0.8b, %2.8b, %3.8b"
   6723            : "=w"(result)
   6724            : "0"(a), "w"(b), "w"(c)
   6725            : /* No clobbers */);
   6726   return result;
   6727 }
   6728 
   6729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   6730 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
   6731 {
   6732   int16x4_t result;
   6733   __asm__ ("mla %0.4h, %2.4h, %3.4h"
   6734            : "=w"(result)
   6735            : "0"(a), "w"(b), "w"(c)
   6736            : /* No clobbers */);
   6737   return result;
   6738 }
   6739 
   6740 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   6741 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
   6742 {
   6743   int32x2_t result;
   6744   __asm__ ("mla %0.2s, %2.2s, %3.2s"
   6745            : "=w"(result)
   6746            : "0"(a), "w"(b), "w"(c)
   6747            : /* No clobbers */);
   6748   return result;
   6749 }
   6750 
   6751 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   6752 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
   6753 {
   6754   uint8x8_t result;
   6755   __asm__ ("mla %0.8b, %2.8b, %3.8b"
   6756            : "=w"(result)
   6757            : "0"(a), "w"(b), "w"(c)
   6758            : /* No clobbers */);
   6759   return result;
   6760 }
   6761 
   6762 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   6763 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
   6764 {
   6765   uint16x4_t result;
   6766   __asm__ ("mla %0.4h, %2.4h, %3.4h"
   6767            : "=w"(result)
   6768            : "0"(a), "w"(b), "w"(c)
   6769            : /* No clobbers */);
   6770   return result;
   6771 }
   6772 
   6773 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   6774 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
   6775 {
   6776   uint32x2_t result;
   6777   __asm__ ("mla %0.2s, %2.2s, %3.2s"
   6778            : "=w"(result)
   6779            : "0"(a), "w"(b), "w"(c)
   6780            : /* No clobbers */);
   6781   return result;
   6782 }
   6783 
   6784 #define vmlal_high_lane_s16(a, b, c, d)                                 \
   6785   __extension__                                                         \
   6786     ({                                                                  \
   6787        int16x8_t c_ = (c);                                              \
   6788        int16x8_t b_ = (b);                                              \
   6789        int32x4_t a_ = (a);                                              \
   6790        int32x4_t result;                                                \
   6791        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
   6792                 : "=w"(result)                                          \
   6793                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   6794                 : /* No clobbers */);                                   \
   6795        result;                                                          \
   6796      })
   6797 
   6798 #define vmlal_high_lane_s32(a, b, c, d)                                 \
   6799   __extension__                                                         \
   6800     ({                                                                  \
   6801        int32x4_t c_ = (c);                                              \
   6802        int32x4_t b_ = (b);                                              \
   6803        int64x2_t a_ = (a);                                              \
   6804        int64x2_t result;                                                \
   6805        __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
   6806                 : "=w"(result)                                          \
   6807                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   6808                 : /* No clobbers */);                                   \
   6809        result;                                                          \
   6810      })
   6811 
   6812 #define vmlal_high_lane_u16(a, b, c, d)                                 \
   6813   __extension__                                                         \
   6814     ({                                                                  \
   6815        uint16x8_t c_ = (c);                                             \
   6816        uint16x8_t b_ = (b);                                             \
   6817        uint32x4_t a_ = (a);                                             \
   6818        uint32x4_t result;                                               \
   6819        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
   6820                 : "=w"(result)                                          \
   6821                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   6822                 : /* No clobbers */);                                   \
   6823        result;                                                          \
   6824      })
   6825 
   6826 #define vmlal_high_lane_u32(a, b, c, d)                                 \
   6827   __extension__                                                         \
   6828     ({                                                                  \
   6829        uint32x4_t c_ = (c);                                             \
   6830        uint32x4_t b_ = (b);                                             \
   6831        uint64x2_t a_ = (a);                                             \
   6832        uint64x2_t result;                                               \
   6833        __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
   6834                 : "=w"(result)                                          \
   6835                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   6836                 : /* No clobbers */);                                   \
   6837        result;                                                          \
   6838      })
   6839 
   6840 #define vmlal_high_laneq_s16(a, b, c, d)                                \
   6841   __extension__                                                         \
   6842     ({                                                                  \
   6843        int16x8_t c_ = (c);                                              \
   6844        int16x8_t b_ = (b);                                              \
   6845        int32x4_t a_ = (a);                                              \
   6846        int32x4_t result;                                                \
   6847        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
   6848                 : "=w"(result)                                          \
   6849                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   6850                 : /* No clobbers */);                                   \
   6851        result;                                                          \
   6852      })
   6853 
   6854 #define vmlal_high_laneq_s32(a, b, c, d)                                \
   6855   __extension__                                                         \
   6856     ({                                                                  \
   6857        int32x4_t c_ = (c);                                              \
   6858        int32x4_t b_ = (b);                                              \
   6859        int64x2_t a_ = (a);                                              \
   6860        int64x2_t result;                                                \
   6861        __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
   6862                 : "=w"(result)                                          \
   6863                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   6864                 : /* No clobbers */);                                   \
   6865        result;                                                          \
   6866      })
   6867 
   6868 #define vmlal_high_laneq_u16(a, b, c, d)                                \
   6869   __extension__                                                         \
   6870     ({                                                                  \
   6871        uint16x8_t c_ = (c);                                             \
   6872        uint16x8_t b_ = (b);                                             \
   6873        uint32x4_t a_ = (a);                                             \
   6874        uint32x4_t result;                                               \
   6875        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
   6876                 : "=w"(result)                                          \
   6877                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   6878                 : /* No clobbers */);                                   \
   6879        result;                                                          \
   6880      })
   6881 
   6882 #define vmlal_high_laneq_u32(a, b, c, d)                                \
   6883   __extension__                                                         \
   6884     ({                                                                  \
   6885        uint32x4_t c_ = (c);                                             \
   6886        uint32x4_t b_ = (b);                                             \
   6887        uint64x2_t a_ = (a);                                             \
   6888        uint64x2_t result;                                               \
   6889        __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
   6890                 : "=w"(result)                                          \
   6891                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   6892                 : /* No clobbers */);                                   \
   6893        result;                                                          \
   6894      })
   6895 
   6896 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   6897 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
   6898 {
   6899   int32x4_t result;
   6900   __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
   6901            : "=w"(result)
   6902            : "0"(a), "w"(b), "x"(c)
   6903            : /* No clobbers */);
   6904   return result;
   6905 }
   6906 
   6907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   6908 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
   6909 {
   6910   int64x2_t result;
   6911   __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
   6912            : "=w"(result)
   6913            : "0"(a), "w"(b), "w"(c)
   6914            : /* No clobbers */);
   6915   return result;
   6916 }
   6917 
   6918 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   6919 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
   6920 {
   6921   uint32x4_t result;
   6922   __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
   6923            : "=w"(result)
   6924            : "0"(a), "w"(b), "x"(c)
   6925            : /* No clobbers */);
   6926   return result;
   6927 }
   6928 
   6929 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   6930 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
   6931 {
   6932   uint64x2_t result;
   6933   __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
   6934            : "=w"(result)
   6935            : "0"(a), "w"(b), "w"(c)
   6936            : /* No clobbers */);
   6937   return result;
   6938 }
   6939 
   6940 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   6941 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
   6942 {
   6943   int16x8_t result;
   6944   __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
   6945            : "=w"(result)
   6946            : "0"(a), "w"(b), "w"(c)
   6947            : /* No clobbers */);
   6948   return result;
   6949 }
   6950 
   6951 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   6952 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
   6953 {
   6954   int32x4_t result;
   6955   __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
   6956            : "=w"(result)
   6957            : "0"(a), "w"(b), "w"(c)
   6958            : /* No clobbers */);
   6959   return result;
   6960 }
   6961 
   6962 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   6963 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
   6964 {
   6965   int64x2_t result;
   6966   __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
   6967            : "=w"(result)
   6968            : "0"(a), "w"(b), "w"(c)
   6969            : /* No clobbers */);
   6970   return result;
   6971 }
   6972 
   6973 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   6974 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
   6975 {
   6976   uint16x8_t result;
   6977   __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
   6978            : "=w"(result)
   6979            : "0"(a), "w"(b), "w"(c)
   6980            : /* No clobbers */);
   6981   return result;
   6982 }
   6983 
   6984 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   6985 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
   6986 {
   6987   uint32x4_t result;
   6988   __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
   6989            : "=w"(result)
   6990            : "0"(a), "w"(b), "w"(c)
   6991            : /* No clobbers */);
   6992   return result;
   6993 }
   6994 
   6995 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   6996 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
   6997 {
   6998   uint64x2_t result;
   6999   __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
   7000            : "=w"(result)
   7001            : "0"(a), "w"(b), "w"(c)
   7002            : /* No clobbers */);
   7003   return result;
   7004 }
   7005 
   7006 #define vmlal_lane_s16(a, b, c, d)                                      \
   7007   __extension__                                                         \
   7008     ({                                                                  \
   7009        int16x4_t c_ = (c);                                              \
   7010        int16x4_t b_ = (b);                                              \
   7011        int32x4_t a_ = (a);                                              \
   7012        int32x4_t result;                                                \
   7013        __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]"                            \
   7014                 : "=w"(result)                                          \
   7015                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7016                 : /* No clobbers */);                                   \
   7017        result;                                                          \
   7018      })
   7019 
   7020 #define vmlal_lane_s32(a, b, c, d)                                      \
   7021   __extension__                                                         \
   7022     ({                                                                  \
   7023        int32x2_t c_ = (c);                                              \
   7024        int32x2_t b_ = (b);                                              \
   7025        int64x2_t a_ = (a);                                              \
   7026        int64x2_t result;                                                \
   7027        __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]"                            \
   7028                 : "=w"(result)                                          \
   7029                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7030                 : /* No clobbers */);                                   \
   7031        result;                                                          \
   7032      })
   7033 
   7034 #define vmlal_lane_u16(a, b, c, d)                                      \
   7035   __extension__                                                         \
   7036     ({                                                                  \
   7037        uint16x4_t c_ = (c);                                             \
   7038        uint16x4_t b_ = (b);                                             \
   7039        uint32x4_t a_ = (a);                                             \
   7040        uint32x4_t result;                                               \
   7041        __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]"                            \
   7042                 : "=w"(result)                                          \
   7043                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7044                 : /* No clobbers */);                                   \
   7045        result;                                                          \
   7046      })
   7047 
   7048 #define vmlal_lane_u32(a, b, c, d)                                      \
   7049   __extension__                                                         \
   7050     ({                                                                  \
   7051        uint32x2_t c_ = (c);                                             \
   7052        uint32x2_t b_ = (b);                                             \
   7053        uint64x2_t a_ = (a);                                             \
   7054        uint64x2_t result;                                               \
   7055        __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
   7056                 : "=w"(result)                                          \
   7057                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7058                 : /* No clobbers */);                                   \
   7059        result;                                                          \
   7060      })
   7061 
   7062 #define vmlal_laneq_s16(a, b, c, d)                                     \
   7063   __extension__                                                         \
   7064     ({                                                                  \
   7065        int16x8_t c_ = (c);                                              \
   7066        int16x4_t b_ = (b);                                              \
   7067        int32x4_t a_ = (a);                                              \
   7068        int32x4_t result;                                                \
   7069        __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]"                          \
   7070                 : "=w"(result)                                          \
   7071                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7072                 : /* No clobbers */);                                   \
   7073        result;                                                          \
   7074      })
   7075 
   7076 #define vmlal_laneq_s32(a, b, c, d)                                     \
   7077   __extension__                                                         \
   7078     ({                                                                  \
   7079        int32x4_t c_ = (c);                                              \
   7080        int32x2_t b_ = (b);                                              \
   7081        int64x2_t a_ = (a);                                              \
   7082        int64x2_t result;                                                \
   7083        __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]"                          \
   7084                 : "=w"(result)                                          \
   7085                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7086                 : /* No clobbers */);                                   \
   7087        result;                                                          \
   7088      })
   7089 
   7090 #define vmlal_laneq_u16(a, b, c, d)                                     \
   7091   __extension__                                                         \
   7092     ({                                                                  \
   7093        uint16x8_t c_ = (c);                                             \
   7094        uint16x4_t b_ = (b);                                             \
   7095        uint32x4_t a_ = (a);                                             \
   7096        uint32x4_t result;                                               \
   7097        __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]"                          \
   7098                 : "=w"(result)                                          \
   7099                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7100                 : /* No clobbers */);                                   \
   7101        result;                                                          \
   7102      })
   7103 
   7104 #define vmlal_laneq_u32(a, b, c, d)                                     \
   7105   __extension__                                                         \
   7106     ({                                                                  \
   7107        uint32x4_t c_ = (c);                                             \
   7108        uint32x2_t b_ = (b);                                             \
   7109        uint64x2_t a_ = (a);                                             \
   7110        uint64x2_t result;                                               \
   7111        __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
   7112                 : "=w"(result)                                          \
   7113                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7114                 : /* No clobbers */);                                   \
   7115        result;                                                          \
   7116      })
   7117 
   7118 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7119 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
   7120 {
   7121   int32x4_t result;
   7122   __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
   7123            : "=w"(result)
   7124            : "0"(a), "w"(b), "x"(c)
   7125            : /* No clobbers */);
   7126   return result;
   7127 }
   7128 
   7129 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   7130 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
   7131 {
   7132   int64x2_t result;
   7133   __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
   7134            : "=w"(result)
   7135            : "0"(a), "w"(b), "w"(c)
   7136            : /* No clobbers */);
   7137   return result;
   7138 }
   7139 
   7140 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7141 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
   7142 {
   7143   uint32x4_t result;
   7144   __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
   7145            : "=w"(result)
   7146            : "0"(a), "w"(b), "x"(c)
   7147            : /* No clobbers */);
   7148   return result;
   7149 }
   7150 
   7151 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   7152 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
   7153 {
   7154   uint64x2_t result;
   7155   __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
   7156            : "=w"(result)
   7157            : "0"(a), "w"(b), "w"(c)
   7158            : /* No clobbers */);
   7159   return result;
   7160 }
   7161 
   7162 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   7163 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
   7164 {
   7165   int16x8_t result;
   7166   __asm__ ("smlal %0.8h,%2.8b,%3.8b"
   7167            : "=w"(result)
   7168            : "0"(a), "w"(b), "w"(c)
   7169            : /* No clobbers */);
   7170   return result;
   7171 }
   7172 
   7173 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7174 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
   7175 {
   7176   int32x4_t result;
   7177   __asm__ ("smlal %0.4s,%2.4h,%3.4h"
   7178            : "=w"(result)
   7179            : "0"(a), "w"(b), "w"(c)
   7180            : /* No clobbers */);
   7181   return result;
   7182 }
   7183 
   7184 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   7185 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
   7186 {
   7187   int64x2_t result;
   7188   __asm__ ("smlal %0.2d,%2.2s,%3.2s"
   7189            : "=w"(result)
   7190            : "0"(a), "w"(b), "w"(c)
   7191            : /* No clobbers */);
   7192   return result;
   7193 }
   7194 
   7195 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   7196 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
   7197 {
   7198   uint16x8_t result;
   7199   __asm__ ("umlal %0.8h,%2.8b,%3.8b"
   7200            : "=w"(result)
   7201            : "0"(a), "w"(b), "w"(c)
   7202            : /* No clobbers */);
   7203   return result;
   7204 }
   7205 
   7206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7207 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
   7208 {
   7209   uint32x4_t result;
   7210   __asm__ ("umlal %0.4s,%2.4h,%3.4h"
   7211            : "=w"(result)
   7212            : "0"(a), "w"(b), "w"(c)
   7213            : /* No clobbers */);
   7214   return result;
   7215 }
   7216 
   7217 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   7218 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
   7219 {
   7220   uint64x2_t result;
   7221   __asm__ ("umlal %0.2d,%2.2s,%3.2s"
   7222            : "=w"(result)
   7223            : "0"(a), "w"(b), "w"(c)
   7224            : /* No clobbers */);
   7225   return result;
   7226 }
   7227 
   7228 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   7229 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
   7230 {
   7231   float32x4_t result;
   7232   float32x4_t t1;
   7233   __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
   7234            : "=w"(result), "=w"(t1)
   7235            : "0"(a), "w"(b), "w"(c)
   7236            : /* No clobbers */);
   7237   return result;
   7238 }
   7239 
   7240 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   7241 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
   7242 {
   7243   float64x2_t result;
   7244   float64x2_t t1;
   7245   __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
   7246            : "=w"(result), "=w"(t1)
   7247            : "0"(a), "w"(b), "w"(c)
   7248            : /* No clobbers */);
   7249   return result;
   7250 }
   7251 
   7252 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   7253 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
   7254 {
   7255   int16x8_t result;
   7256   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
   7257            : "=w"(result)
   7258            : "0"(a), "w"(b), "x"(c)
   7259            : /* No clobbers */);
   7260   return result;
   7261 }
   7262 
   7263 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7264 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
   7265 {
   7266   int32x4_t result;
   7267   __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
   7268            : "=w"(result)
   7269            : "0"(a), "w"(b), "w"(c)
   7270            : /* No clobbers */);
   7271   return result;
   7272 }
   7273 
   7274 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   7275 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
   7276 {
   7277   uint16x8_t result;
   7278   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
   7279            : "=w"(result)
   7280            : "0"(a), "w"(b), "x"(c)
   7281            : /* No clobbers */);
   7282   return result;
   7283 }
   7284 
   7285 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7286 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
   7287 {
   7288   uint32x4_t result;
   7289   __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
   7290            : "=w"(result)
   7291            : "0"(a), "w"(b), "w"(c)
   7292            : /* No clobbers */);
   7293   return result;
   7294 }
   7295 
   7296 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   7297 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
   7298 {
   7299   int8x16_t result;
   7300   __asm__ ("mla %0.16b, %2.16b, %3.16b"
   7301            : "=w"(result)
   7302            : "0"(a), "w"(b), "w"(c)
   7303            : /* No clobbers */);
   7304   return result;
   7305 }
   7306 
   7307 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   7308 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
   7309 {
   7310   int16x8_t result;
   7311   __asm__ ("mla %0.8h, %2.8h, %3.8h"
   7312            : "=w"(result)
   7313            : "0"(a), "w"(b), "w"(c)
   7314            : /* No clobbers */);
   7315   return result;
   7316 }
   7317 
   7318 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7319 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
   7320 {
   7321   int32x4_t result;
   7322   __asm__ ("mla %0.4s, %2.4s, %3.4s"
   7323            : "=w"(result)
   7324            : "0"(a), "w"(b), "w"(c)
   7325            : /* No clobbers */);
   7326   return result;
   7327 }
   7328 
   7329 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   7330 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
   7331 {
   7332   uint8x16_t result;
   7333   __asm__ ("mla %0.16b, %2.16b, %3.16b"
   7334            : "=w"(result)
   7335            : "0"(a), "w"(b), "w"(c)
   7336            : /* No clobbers */);
   7337   return result;
   7338 }
   7339 
   7340 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   7341 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
   7342 {
   7343   uint16x8_t result;
   7344   __asm__ ("mla %0.8h, %2.8h, %3.8h"
   7345            : "=w"(result)
   7346            : "0"(a), "w"(b), "w"(c)
   7347            : /* No clobbers */);
   7348   return result;
   7349 }
   7350 
   7351 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7352 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
   7353 {
   7354   uint32x4_t result;
   7355   __asm__ ("mla %0.4s, %2.4s, %3.4s"
   7356            : "=w"(result)
   7357            : "0"(a), "w"(b), "w"(c)
   7358            : /* No clobbers */);
   7359   return result;
   7360 }
   7361 
   7362 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   7363 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
   7364 {
   7365   float32x2_t result;
   7366   float32x2_t t1;
   7367   __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
   7368            : "=w"(result), "=w"(t1)
   7369            : "0"(a), "w"(b), "w"(c)
   7370            : /* No clobbers */);
   7371   return result;
   7372 }
   7373 
   7374 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   7375 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
   7376 {
   7377   int16x4_t result;
   7378   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
   7379            : "=w"(result)
   7380            : "0"(a), "w"(b), "x"(c)
   7381            : /* No clobbers */);
   7382   return result;
   7383 }
   7384 
   7385 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   7386 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
   7387 {
   7388   int32x2_t result;
   7389   __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
   7390            : "=w"(result)
   7391            : "0"(a), "w"(b), "w"(c)
   7392            : /* No clobbers */);
   7393   return result;
   7394 }
   7395 
   7396 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   7397 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
   7398 {
   7399   uint16x4_t result;
   7400   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
   7401            : "=w"(result)
   7402            : "0"(a), "w"(b), "x"(c)
   7403            : /* No clobbers */);
   7404   return result;
   7405 }
   7406 
   7407 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   7408 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
   7409 {
   7410   uint32x2_t result;
   7411   __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
   7412            : "=w"(result)
   7413            : "0"(a), "w"(b), "w"(c)
   7414            : /* No clobbers */);
   7415   return result;
   7416 }
   7417 
   7418 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   7419 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
   7420 {
   7421   int8x8_t result;
   7422   __asm__ ("mls %0.8b,%2.8b,%3.8b"
   7423            : "=w"(result)
   7424            : "0"(a), "w"(b), "w"(c)
   7425            : /* No clobbers */);
   7426   return result;
   7427 }
   7428 
   7429 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   7430 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
   7431 {
   7432   int16x4_t result;
   7433   __asm__ ("mls %0.4h,%2.4h,%3.4h"
   7434            : "=w"(result)
   7435            : "0"(a), "w"(b), "w"(c)
   7436            : /* No clobbers */);
   7437   return result;
   7438 }
   7439 
   7440 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   7441 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
   7442 {
   7443   int32x2_t result;
   7444   __asm__ ("mls %0.2s,%2.2s,%3.2s"
   7445            : "=w"(result)
   7446            : "0"(a), "w"(b), "w"(c)
   7447            : /* No clobbers */);
   7448   return result;
   7449 }
   7450 
   7451 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   7452 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
   7453 {
   7454   uint8x8_t result;
   7455   __asm__ ("mls %0.8b,%2.8b,%3.8b"
   7456            : "=w"(result)
   7457            : "0"(a), "w"(b), "w"(c)
   7458            : /* No clobbers */);
   7459   return result;
   7460 }
   7461 
   7462 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   7463 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
   7464 {
   7465   uint16x4_t result;
   7466   __asm__ ("mls %0.4h,%2.4h,%3.4h"
   7467            : "=w"(result)
   7468            : "0"(a), "w"(b), "w"(c)
   7469            : /* No clobbers */);
   7470   return result;
   7471 }
   7472 
   7473 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   7474 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
   7475 {
   7476   uint32x2_t result;
   7477   __asm__ ("mls %0.2s,%2.2s,%3.2s"
   7478            : "=w"(result)
   7479            : "0"(a), "w"(b), "w"(c)
   7480            : /* No clobbers */);
   7481   return result;
   7482 }
   7483 
   7484 #define vmlsl_high_lane_s16(a, b, c, d)                                 \
   7485   __extension__                                                         \
   7486     ({                                                                  \
   7487        int16x8_t c_ = (c);                                              \
   7488        int16x8_t b_ = (b);                                              \
   7489        int32x4_t a_ = (a);                                              \
   7490        int32x4_t result;                                                \
   7491        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
   7492                 : "=w"(result)                                          \
   7493                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7494                 : /* No clobbers */);                                   \
   7495        result;                                                          \
   7496      })
   7497 
   7498 #define vmlsl_high_lane_s32(a, b, c, d)                                 \
   7499   __extension__                                                         \
   7500     ({                                                                  \
   7501        int32x4_t c_ = (c);                                              \
   7502        int32x4_t b_ = (b);                                              \
   7503        int64x2_t a_ = (a);                                              \
   7504        int64x2_t result;                                                \
   7505        __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
   7506                 : "=w"(result)                                          \
   7507                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7508                 : /* No clobbers */);                                   \
   7509        result;                                                          \
   7510      })
   7511 
   7512 #define vmlsl_high_lane_u16(a, b, c, d)                                 \
   7513   __extension__                                                         \
   7514     ({                                                                  \
   7515        uint16x8_t c_ = (c);                                             \
   7516        uint16x8_t b_ = (b);                                             \
   7517        uint32x4_t a_ = (a);                                             \
   7518        uint32x4_t result;                                               \
   7519        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
   7520                 : "=w"(result)                                          \
   7521                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7522                 : /* No clobbers */);                                   \
   7523        result;                                                          \
   7524      })
   7525 
   7526 #define vmlsl_high_lane_u32(a, b, c, d)                                 \
   7527   __extension__                                                         \
   7528     ({                                                                  \
   7529        uint32x4_t c_ = (c);                                             \
   7530        uint32x4_t b_ = (b);                                             \
   7531        uint64x2_t a_ = (a);                                             \
   7532        uint64x2_t result;                                               \
   7533        __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
   7534                 : "=w"(result)                                          \
   7535                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7536                 : /* No clobbers */);                                   \
   7537        result;                                                          \
   7538      })
   7539 
   7540 #define vmlsl_high_laneq_s16(a, b, c, d)                                \
   7541   __extension__                                                         \
   7542     ({                                                                  \
   7543        int16x8_t c_ = (c);                                              \
   7544        int16x8_t b_ = (b);                                              \
   7545        int32x4_t a_ = (a);                                              \
   7546        int32x4_t result;                                                \
   7547        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
   7548                 : "=w"(result)                                          \
   7549                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7550                 : /* No clobbers */);                                   \
   7551        result;                                                          \
   7552      })
   7553 
   7554 #define vmlsl_high_laneq_s32(a, b, c, d)                                \
   7555   __extension__                                                         \
   7556     ({                                                                  \
   7557        int32x4_t c_ = (c);                                              \
   7558        int32x4_t b_ = (b);                                              \
   7559        int64x2_t a_ = (a);                                              \
   7560        int64x2_t result;                                                \
   7561        __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
   7562                 : "=w"(result)                                          \
   7563                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7564                 : /* No clobbers */);                                   \
   7565        result;                                                          \
   7566      })
   7567 
   7568 #define vmlsl_high_laneq_u16(a, b, c, d)                                \
   7569   __extension__                                                         \
   7570     ({                                                                  \
   7571        uint16x8_t c_ = (c);                                             \
   7572        uint16x8_t b_ = (b);                                             \
   7573        uint32x4_t a_ = (a);                                             \
   7574        uint32x4_t result;                                               \
   7575        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
   7576                 : "=w"(result)                                          \
   7577                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7578                 : /* No clobbers */);                                   \
   7579        result;                                                          \
   7580      })
   7581 
   7582 #define vmlsl_high_laneq_u32(a, b, c, d)                                \
   7583   __extension__                                                         \
   7584     ({                                                                  \
   7585        uint32x4_t c_ = (c);                                             \
   7586        uint32x4_t b_ = (b);                                             \
   7587        uint64x2_t a_ = (a);                                             \
   7588        uint64x2_t result;                                               \
   7589        __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
   7590                 : "=w"(result)                                          \
   7591                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7592                 : /* No clobbers */);                                   \
   7593        result;                                                          \
   7594      })
   7595 
   7596 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7597 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
   7598 {
   7599   int32x4_t result;
   7600   __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
   7601            : "=w"(result)
   7602            : "0"(a), "w"(b), "x"(c)
   7603            : /* No clobbers */);
   7604   return result;
   7605 }
   7606 
   7607 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   7608 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
   7609 {
   7610   int64x2_t result;
   7611   __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
   7612            : "=w"(result)
   7613            : "0"(a), "w"(b), "w"(c)
   7614            : /* No clobbers */);
   7615   return result;
   7616 }
   7617 
   7618 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7619 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
   7620 {
   7621   uint32x4_t result;
   7622   __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
   7623            : "=w"(result)
   7624            : "0"(a), "w"(b), "x"(c)
   7625            : /* No clobbers */);
   7626   return result;
   7627 }
   7628 
   7629 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   7630 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
   7631 {
   7632   uint64x2_t result;
   7633   __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
   7634            : "=w"(result)
   7635            : "0"(a), "w"(b), "w"(c)
   7636            : /* No clobbers */);
   7637   return result;
   7638 }
   7639 
   7640 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   7641 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
   7642 {
   7643   int16x8_t result;
   7644   __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
   7645            : "=w"(result)
   7646            : "0"(a), "w"(b), "w"(c)
   7647            : /* No clobbers */);
   7648   return result;
   7649 }
   7650 
   7651 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7652 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
   7653 {
   7654   int32x4_t result;
   7655   __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
   7656            : "=w"(result)
   7657            : "0"(a), "w"(b), "w"(c)
   7658            : /* No clobbers */);
   7659   return result;
   7660 }
   7661 
   7662 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   7663 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
   7664 {
   7665   int64x2_t result;
   7666   __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
   7667            : "=w"(result)
   7668            : "0"(a), "w"(b), "w"(c)
   7669            : /* No clobbers */);
   7670   return result;
   7671 }
   7672 
   7673 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   7674 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
   7675 {
   7676   uint16x8_t result;
   7677   __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
   7678            : "=w"(result)
   7679            : "0"(a), "w"(b), "w"(c)
   7680            : /* No clobbers */);
   7681   return result;
   7682 }
   7683 
   7684 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7685 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
   7686 {
   7687   uint32x4_t result;
   7688   __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
   7689            : "=w"(result)
   7690            : "0"(a), "w"(b), "w"(c)
   7691            : /* No clobbers */);
   7692   return result;
   7693 }
   7694 
   7695 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   7696 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
   7697 {
   7698   uint64x2_t result;
   7699   __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
   7700            : "=w"(result)
   7701            : "0"(a), "w"(b), "w"(c)
   7702            : /* No clobbers */);
   7703   return result;
   7704 }
   7705 
   7706 #define vmlsl_lane_s16(a, b, c, d)                                      \
   7707   __extension__                                                         \
   7708     ({                                                                  \
   7709        int16x4_t c_ = (c);                                              \
   7710        int16x4_t b_ = (b);                                              \
   7711        int32x4_t a_ = (a);                                              \
   7712        int32x4_t result;                                                \
   7713        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
   7714                 : "=w"(result)                                          \
   7715                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7716                 : /* No clobbers */);                                   \
   7717        result;                                                          \
   7718      })
   7719 
   7720 #define vmlsl_lane_s32(a, b, c, d)                                      \
   7721   __extension__                                                         \
   7722     ({                                                                  \
   7723        int32x2_t c_ = (c);                                              \
   7724        int32x2_t b_ = (b);                                              \
   7725        int64x2_t a_ = (a);                                              \
   7726        int64x2_t result;                                                \
   7727        __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
   7728                 : "=w"(result)                                          \
   7729                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7730                 : /* No clobbers */);                                   \
   7731        result;                                                          \
   7732      })
   7733 
   7734 #define vmlsl_lane_u16(a, b, c, d)                                      \
   7735   __extension__                                                         \
   7736     ({                                                                  \
   7737        uint16x4_t c_ = (c);                                             \
   7738        uint16x4_t b_ = (b);                                             \
   7739        uint32x4_t a_ = (a);                                             \
   7740        uint32x4_t result;                                               \
   7741        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
   7742                 : "=w"(result)                                          \
   7743                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7744                 : /* No clobbers */);                                   \
   7745        result;                                                          \
   7746      })
   7747 
   7748 #define vmlsl_lane_u32(a, b, c, d)                                      \
   7749   __extension__                                                         \
   7750     ({                                                                  \
   7751        uint32x2_t c_ = (c);                                             \
   7752        uint32x2_t b_ = (b);                                             \
   7753        uint64x2_t a_ = (a);                                             \
   7754        uint64x2_t result;                                               \
   7755        __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
   7756                 : "=w"(result)                                          \
   7757                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7758                 : /* No clobbers */);                                   \
   7759        result;                                                          \
   7760      })
   7761 
   7762 #define vmlsl_laneq_s16(a, b, c, d)                                     \
   7763   __extension__                                                         \
   7764     ({                                                                  \
   7765        int16x8_t c_ = (c);                                              \
   7766        int16x4_t b_ = (b);                                              \
   7767        int32x4_t a_ = (a);                                              \
   7768        int32x4_t result;                                                \
   7769        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
   7770                 : "=w"(result)                                          \
   7771                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7772                 : /* No clobbers */);                                   \
   7773        result;                                                          \
   7774      })
   7775 
   7776 #define vmlsl_laneq_s32(a, b, c, d)                                     \
   7777   __extension__                                                         \
   7778     ({                                                                  \
   7779        int32x4_t c_ = (c);                                              \
   7780        int32x2_t b_ = (b);                                              \
   7781        int64x2_t a_ = (a);                                              \
   7782        int64x2_t result;                                                \
   7783        __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
   7784                 : "=w"(result)                                          \
   7785                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7786                 : /* No clobbers */);                                   \
   7787        result;                                                          \
   7788      })
   7789 
   7790 #define vmlsl_laneq_u16(a, b, c, d)                                     \
   7791   __extension__                                                         \
   7792     ({                                                                  \
   7793        uint16x8_t c_ = (c);                                             \
   7794        uint16x4_t b_ = (b);                                             \
   7795        uint32x4_t a_ = (a);                                             \
   7796        uint32x4_t result;                                               \
   7797        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
   7798                 : "=w"(result)                                          \
   7799                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
   7800                 : /* No clobbers */);                                   \
   7801        result;                                                          \
   7802      })
   7803 
   7804 #define vmlsl_laneq_u32(a, b, c, d)                                     \
   7805   __extension__                                                         \
   7806     ({                                                                  \
   7807        uint32x4_t c_ = (c);                                             \
   7808        uint32x2_t b_ = (b);                                             \
   7809        uint64x2_t a_ = (a);                                             \
   7810        uint64x2_t result;                                               \
   7811        __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
   7812                 : "=w"(result)                                          \
   7813                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
   7814                 : /* No clobbers */);                                   \
   7815        result;                                                          \
   7816      })
   7817 
   7818 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7819 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
   7820 {
   7821   int32x4_t result;
   7822   __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
   7823            : "=w"(result)
   7824            : "0"(a), "w"(b), "x"(c)
   7825            : /* No clobbers */);
   7826   return result;
   7827 }
   7828 
   7829 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   7830 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
   7831 {
   7832   int64x2_t result;
   7833   __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
   7834            : "=w"(result)
   7835            : "0"(a), "w"(b), "w"(c)
   7836            : /* No clobbers */);
   7837   return result;
   7838 }
   7839 
   7840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7841 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
   7842 {
   7843   uint32x4_t result;
   7844   __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
   7845            : "=w"(result)
   7846            : "0"(a), "w"(b), "x"(c)
   7847            : /* No clobbers */);
   7848   return result;
   7849 }
   7850 
   7851 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   7852 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
   7853 {
   7854   uint64x2_t result;
   7855   __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
   7856            : "=w"(result)
   7857            : "0"(a), "w"(b), "w"(c)
   7858            : /* No clobbers */);
   7859   return result;
   7860 }
   7861 
   7862 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   7863 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
   7864 {
   7865   int16x8_t result;
   7866   __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
   7867            : "=w"(result)
   7868            : "0"(a), "w"(b), "w"(c)
   7869            : /* No clobbers */);
   7870   return result;
   7871 }
   7872 
   7873 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7874 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
   7875 {
   7876   int32x4_t result;
   7877   __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
   7878            : "=w"(result)
   7879            : "0"(a), "w"(b), "w"(c)
   7880            : /* No clobbers */);
   7881   return result;
   7882 }
   7883 
   7884 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   7885 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
   7886 {
   7887   int64x2_t result;
   7888   __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
   7889            : "=w"(result)
   7890            : "0"(a), "w"(b), "w"(c)
   7891            : /* No clobbers */);
   7892   return result;
   7893 }
   7894 
   7895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   7896 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
   7897 {
   7898   uint16x8_t result;
   7899   __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
   7900            : "=w"(result)
   7901            : "0"(a), "w"(b), "w"(c)
   7902            : /* No clobbers */);
   7903   return result;
   7904 }
   7905 
   7906 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7907 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
   7908 {
   7909   uint32x4_t result;
   7910   __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
   7911            : "=w"(result)
   7912            : "0"(a), "w"(b), "w"(c)
   7913            : /* No clobbers */);
   7914   return result;
   7915 }
   7916 
   7917 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   7918 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
   7919 {
   7920   uint64x2_t result;
   7921   __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
   7922            : "=w"(result)
   7923            : "0"(a), "w"(b), "w"(c)
   7924            : /* No clobbers */);
   7925   return result;
   7926 }
   7927 
   7928 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   7929 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
   7930 {
   7931   float32x4_t result;
   7932   float32x4_t t1;
   7933   __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
   7934            : "=w"(result), "=w"(t1)
   7935            : "0"(a), "w"(b), "w"(c)
   7936            : /* No clobbers */);
   7937   return result;
   7938 }
   7939 
   7940 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   7941 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
   7942 {
   7943   float64x2_t result;
   7944   float64x2_t t1;
   7945   __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
   7946            : "=w"(result), "=w"(t1)
   7947            : "0"(a), "w"(b), "x"(c)
   7948            : /* No clobbers */);
   7949   return result;
   7950 }
   7951 
   7952 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   7953 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
   7954 {
   7955   int16x8_t result;
   7956   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
   7957            : "=w"(result)
   7958            : "0"(a), "w"(b), "x"(c)
   7959            : /* No clobbers */);
   7960   return result;
   7961 }
   7962 
   7963 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   7964 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
   7965 {
   7966   int32x4_t result;
   7967   __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
   7968            : "=w"(result)
   7969            : "0"(a), "w"(b), "w"(c)
   7970            : /* No clobbers */);
   7971   return result;
   7972 }
   7973 
   7974 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   7975 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
   7976 {
   7977   uint16x8_t result;
   7978   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
   7979            : "=w"(result)
   7980            : "0"(a), "w"(b), "x"(c)
   7981            : /* No clobbers */);
   7982   return result;
   7983 }
   7984 
   7985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   7986 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
   7987 {
   7988   uint32x4_t result;
   7989   __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
   7990            : "=w"(result)
   7991            : "0"(a), "w"(b), "w"(c)
   7992            : /* No clobbers */);
   7993   return result;
   7994 }
   7995 
   7996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   7997 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
   7998 {
   7999   int8x16_t result;
   8000   __asm__ ("mls %0.16b,%2.16b,%3.16b"
   8001            : "=w"(result)
   8002            : "0"(a), "w"(b), "w"(c)
   8003            : /* No clobbers */);
   8004   return result;
   8005 }
   8006 
   8007 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   8008 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
   8009 {
   8010   int16x8_t result;
   8011   __asm__ ("mls %0.8h,%2.8h,%3.8h"
   8012            : "=w"(result)
   8013            : "0"(a), "w"(b), "w"(c)
   8014            : /* No clobbers */);
   8015   return result;
   8016 }
   8017 
   8018 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8019 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
   8020 {
   8021   int32x4_t result;
   8022   __asm__ ("mls %0.4s,%2.4s,%3.4s"
   8023            : "=w"(result)
   8024            : "0"(a), "w"(b), "w"(c)
   8025            : /* No clobbers */);
   8026   return result;
   8027 }
   8028 
   8029 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   8030 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
   8031 {
   8032   uint8x16_t result;
   8033   __asm__ ("mls %0.16b,%2.16b,%3.16b"
   8034            : "=w"(result)
   8035            : "0"(a), "w"(b), "w"(c)
   8036            : /* No clobbers */);
   8037   return result;
   8038 }
   8039 
   8040 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   8041 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
   8042 {
   8043   uint16x8_t result;
   8044   __asm__ ("mls %0.8h,%2.8h,%3.8h"
   8045            : "=w"(result)
   8046            : "0"(a), "w"(b), "w"(c)
   8047            : /* No clobbers */);
   8048   return result;
   8049 }
   8050 
   8051 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8052 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
   8053 {
   8054   uint32x4_t result;
   8055   __asm__ ("mls %0.4s,%2.4s,%3.4s"
   8056            : "=w"(result)
   8057            : "0"(a), "w"(b), "w"(c)
   8058            : /* No clobbers */);
   8059   return result;
   8060 }
   8061 
   8062 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   8063 vmovl_high_s8 (int8x16_t a)
   8064 {
   8065   int16x8_t result;
   8066   __asm__ ("sshll2 %0.8h,%1.16b,#0"
   8067            : "=w"(result)
   8068            : "w"(a)
   8069            : /* No clobbers */);
   8070   return result;
   8071 }
   8072 
   8073 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8074 vmovl_high_s16 (int16x8_t a)
   8075 {
   8076   int32x4_t result;
   8077   __asm__ ("sshll2 %0.4s,%1.8h,#0"
   8078            : "=w"(result)
   8079            : "w"(a)
   8080            : /* No clobbers */);
   8081   return result;
   8082 }
   8083 
   8084 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   8085 vmovl_high_s32 (int32x4_t a)
   8086 {
   8087   int64x2_t result;
   8088   __asm__ ("sshll2 %0.2d,%1.4s,#0"
   8089            : "=w"(result)
   8090            : "w"(a)
   8091            : /* No clobbers */);
   8092   return result;
   8093 }
   8094 
   8095 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   8096 vmovl_high_u8 (uint8x16_t a)
   8097 {
   8098   uint16x8_t result;
   8099   __asm__ ("ushll2 %0.8h,%1.16b,#0"
   8100            : "=w"(result)
   8101            : "w"(a)
   8102            : /* No clobbers */);
   8103   return result;
   8104 }
   8105 
   8106 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8107 vmovl_high_u16 (uint16x8_t a)
   8108 {
   8109   uint32x4_t result;
   8110   __asm__ ("ushll2 %0.4s,%1.8h,#0"
   8111            : "=w"(result)
   8112            : "w"(a)
   8113            : /* No clobbers */);
   8114   return result;
   8115 }
   8116 
   8117 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   8118 vmovl_high_u32 (uint32x4_t a)
   8119 {
   8120   uint64x2_t result;
   8121   __asm__ ("ushll2 %0.2d,%1.4s,#0"
   8122            : "=w"(result)
   8123            : "w"(a)
   8124            : /* No clobbers */);
   8125   return result;
   8126 }
   8127 
   8128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   8129 vmovl_s8 (int8x8_t a)
   8130 {
   8131   int16x8_t result;
   8132   __asm__ ("sshll %0.8h,%1.8b,#0"
   8133            : "=w"(result)
   8134            : "w"(a)
   8135            : /* No clobbers */);
   8136   return result;
   8137 }
   8138 
   8139 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8140 vmovl_s16 (int16x4_t a)
   8141 {
   8142   int32x4_t result;
   8143   __asm__ ("sshll %0.4s,%1.4h,#0"
   8144            : "=w"(result)
   8145            : "w"(a)
   8146            : /* No clobbers */);
   8147   return result;
   8148 }
   8149 
   8150 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   8151 vmovl_s32 (int32x2_t a)
   8152 {
   8153   int64x2_t result;
   8154   __asm__ ("sshll %0.2d,%1.2s,#0"
   8155            : "=w"(result)
   8156            : "w"(a)
   8157            : /* No clobbers */);
   8158   return result;
   8159 }
   8160 
   8161 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   8162 vmovl_u8 (uint8x8_t a)
   8163 {
   8164   uint16x8_t result;
   8165   __asm__ ("ushll %0.8h,%1.8b,#0"
   8166            : "=w"(result)
   8167            : "w"(a)
   8168            : /* No clobbers */);
   8169   return result;
   8170 }
   8171 
   8172 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8173 vmovl_u16 (uint16x4_t a)
   8174 {
   8175   uint32x4_t result;
   8176   __asm__ ("ushll %0.4s,%1.4h,#0"
   8177            : "=w"(result)
   8178            : "w"(a)
   8179            : /* No clobbers */);
   8180   return result;
   8181 }
   8182 
   8183 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   8184 vmovl_u32 (uint32x2_t a)
   8185 {
   8186   uint64x2_t result;
   8187   __asm__ ("ushll %0.2d,%1.2s,#0"
   8188            : "=w"(result)
   8189            : "w"(a)
   8190            : /* No clobbers */);
   8191   return result;
   8192 }
   8193 
   8194 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   8195 vmovn_high_s16 (int8x8_t a, int16x8_t b)
   8196 {
   8197   int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   8198   __asm__ ("xtn2 %0.16b,%1.8h"
   8199            : "+w"(result)
   8200            : "w"(b)
   8201            : /* No clobbers */);
   8202   return result;
   8203 }
   8204 
   8205 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   8206 vmovn_high_s32 (int16x4_t a, int32x4_t b)
   8207 {
   8208   int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   8209   __asm__ ("xtn2 %0.8h,%1.4s"
   8210            : "+w"(result)
   8211            : "w"(b)
   8212            : /* No clobbers */);
   8213   return result;
   8214 }
   8215 
   8216 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8217 vmovn_high_s64 (int32x2_t a, int64x2_t b)
   8218 {
   8219   int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   8220   __asm__ ("xtn2 %0.4s,%1.2d"
   8221            : "+w"(result)
   8222            : "w"(b)
   8223            : /* No clobbers */);
   8224   return result;
   8225 }
   8226 
   8227 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   8228 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
   8229 {
   8230   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   8231   __asm__ ("xtn2 %0.16b,%1.8h"
   8232            : "+w"(result)
   8233            : "w"(b)
   8234            : /* No clobbers */);
   8235   return result;
   8236 }
   8237 
   8238 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   8239 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
   8240 {
   8241   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   8242   __asm__ ("xtn2 %0.8h,%1.4s"
   8243            : "+w"(result)
   8244            : "w"(b)
   8245            : /* No clobbers */);
   8246   return result;
   8247 }
   8248 
   8249 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8250 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
   8251 {
   8252   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   8253   __asm__ ("xtn2 %0.4s,%1.2d"
   8254            : "+w"(result)
   8255            : "w"(b)
   8256            : /* No clobbers */);
   8257   return result;
   8258 }
   8259 
   8260 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   8261 vmovn_s16 (int16x8_t a)
   8262 {
   8263   int8x8_t result;
   8264   __asm__ ("xtn %0.8b,%1.8h"
   8265            : "=w"(result)
   8266            : "w"(a)
   8267            : /* No clobbers */);
   8268   return result;
   8269 }
   8270 
   8271 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   8272 vmovn_s32 (int32x4_t a)
   8273 {
   8274   int16x4_t result;
   8275   __asm__ ("xtn %0.4h,%1.4s"
   8276            : "=w"(result)
   8277            : "w"(a)
   8278            : /* No clobbers */);
   8279   return result;
   8280 }
   8281 
   8282 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   8283 vmovn_s64 (int64x2_t a)
   8284 {
   8285   int32x2_t result;
   8286   __asm__ ("xtn %0.2s,%1.2d"
   8287            : "=w"(result)
   8288            : "w"(a)
   8289            : /* No clobbers */);
   8290   return result;
   8291 }
   8292 
   8293 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   8294 vmovn_u16 (uint16x8_t a)
   8295 {
   8296   uint8x8_t result;
   8297   __asm__ ("xtn %0.8b,%1.8h"
   8298            : "=w"(result)
   8299            : "w"(a)
   8300            : /* No clobbers */);
   8301   return result;
   8302 }
   8303 
   8304 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   8305 vmovn_u32 (uint32x4_t a)
   8306 {
   8307   uint16x4_t result;
   8308   __asm__ ("xtn %0.4h,%1.4s"
   8309            : "=w"(result)
   8310            : "w"(a)
   8311            : /* No clobbers */);
   8312   return result;
   8313 }
   8314 
   8315 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   8316 vmovn_u64 (uint64x2_t a)
   8317 {
   8318   uint32x2_t result;
   8319   __asm__ ("xtn %0.2s,%1.2d"
   8320            : "=w"(result)
   8321            : "w"(a)
   8322            : /* No clobbers */);
   8323   return result;
   8324 }
   8325 
   8326 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   8327 vmul_n_f32 (float32x2_t a, float32_t b)
   8328 {
   8329   float32x2_t result;
   8330   __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
   8331            : "=w"(result)
   8332            : "w"(a), "w"(b)
   8333            : /* No clobbers */);
   8334   return result;
   8335 }
   8336 
   8337 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   8338 vmul_n_s16 (int16x4_t a, int16_t b)
   8339 {
   8340   int16x4_t result;
   8341   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
   8342            : "=w"(result)
   8343            : "w"(a), "x"(b)
   8344            : /* No clobbers */);
   8345   return result;
   8346 }
   8347 
   8348 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   8349 vmul_n_s32 (int32x2_t a, int32_t b)
   8350 {
   8351   int32x2_t result;
   8352   __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
   8353            : "=w"(result)
   8354            : "w"(a), "w"(b)
   8355            : /* No clobbers */);
   8356   return result;
   8357 }
   8358 
   8359 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   8360 vmul_n_u16 (uint16x4_t a, uint16_t b)
   8361 {
   8362   uint16x4_t result;
   8363   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
   8364            : "=w"(result)
   8365            : "w"(a), "x"(b)
   8366            : /* No clobbers */);
   8367   return result;
   8368 }
   8369 
   8370 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   8371 vmul_n_u32 (uint32x2_t a, uint32_t b)
   8372 {
   8373   uint32x2_t result;
   8374   __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
   8375            : "=w"(result)
   8376            : "w"(a), "w"(b)
   8377            : /* No clobbers */);
   8378   return result;
   8379 }
   8380 
   8381 #define vmuld_lane_f64(a, b, c)                                         \
   8382   __extension__                                                         \
   8383     ({                                                                  \
   8384        float64x2_t b_ = (b);                                            \
   8385        float64_t a_ = (a);                                              \
   8386        float64_t result;                                                \
   8387        __asm__ ("fmul %d0,%d1,%2.d[%3]"                                 \
   8388                 : "=w"(result)                                          \
   8389                 : "w"(a_), "w"(b_), "i"(c)                              \
   8390                 : /* No clobbers */);                                   \
   8391        result;                                                          \
   8392      })
   8393 
   8394 #define vmull_high_lane_s16(a, b, c)                                    \
   8395   __extension__                                                         \
   8396     ({                                                                  \
   8397        int16x4_t b_ = (b);                                              \
   8398        int16x8_t a_ = (a);                                              \
   8399        int32x4_t result;                                                \
   8400        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
   8401                 : "=w"(result)                                          \
   8402                 : "w"(a_), "x"(b_), "i"(c)                              \
   8403                 : /* No clobbers */);                                   \
   8404        result;                                                          \
   8405      })
   8406 
   8407 #define vmull_high_lane_s32(a, b, c)                                    \
   8408   __extension__                                                         \
   8409     ({                                                                  \
   8410        int32x2_t b_ = (b);                                              \
   8411        int32x4_t a_ = (a);                                              \
   8412        int64x2_t result;                                                \
   8413        __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
   8414                 : "=w"(result)                                          \
   8415                 : "w"(a_), "w"(b_), "i"(c)                              \
   8416                 : /* No clobbers */);                                   \
   8417        result;                                                          \
   8418      })
   8419 
   8420 #define vmull_high_lane_u16(a, b, c)                                    \
   8421   __extension__                                                         \
   8422     ({                                                                  \
   8423        uint16x4_t b_ = (b);                                             \
   8424        uint16x8_t a_ = (a);                                             \
   8425        uint32x4_t result;                                               \
   8426        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
   8427                 : "=w"(result)                                          \
   8428                 : "w"(a_), "x"(b_), "i"(c)                              \
   8429                 : /* No clobbers */);                                   \
   8430        result;                                                          \
   8431      })
   8432 
   8433 #define vmull_high_lane_u32(a, b, c)                                    \
   8434   __extension__                                                         \
   8435     ({                                                                  \
   8436        uint32x2_t b_ = (b);                                             \
   8437        uint32x4_t a_ = (a);                                             \
   8438        uint64x2_t result;                                               \
   8439        __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
   8440                 : "=w"(result)                                          \
   8441                 : "w"(a_), "w"(b_), "i"(c)                              \
   8442                 : /* No clobbers */);                                   \
   8443        result;                                                          \
   8444      })
   8445 
   8446 #define vmull_high_laneq_s16(a, b, c)                                   \
   8447   __extension__                                                         \
   8448     ({                                                                  \
   8449        int16x8_t b_ = (b);                                              \
   8450        int16x8_t a_ = (a);                                              \
   8451        int32x4_t result;                                                \
   8452        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
   8453                 : "=w"(result)                                          \
   8454                 : "w"(a_), "x"(b_), "i"(c)                              \
   8455                 : /* No clobbers */);                                   \
   8456        result;                                                          \
   8457      })
   8458 
   8459 #define vmull_high_laneq_s32(a, b, c)                                   \
   8460   __extension__                                                         \
   8461     ({                                                                  \
   8462        int32x4_t b_ = (b);                                              \
   8463        int32x4_t a_ = (a);                                              \
   8464        int64x2_t result;                                                \
   8465        __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
   8466                 : "=w"(result)                                          \
   8467                 : "w"(a_), "w"(b_), "i"(c)                              \
   8468                 : /* No clobbers */);                                   \
   8469        result;                                                          \
   8470      })
   8471 
   8472 #define vmull_high_laneq_u16(a, b, c)                                   \
   8473   __extension__                                                         \
   8474     ({                                                                  \
   8475        uint16x8_t b_ = (b);                                             \
   8476        uint16x8_t a_ = (a);                                             \
   8477        uint32x4_t result;                                               \
   8478        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
   8479                 : "=w"(result)                                          \
   8480                 : "w"(a_), "x"(b_), "i"(c)                              \
   8481                 : /* No clobbers */);                                   \
   8482        result;                                                          \
   8483      })
   8484 
   8485 #define vmull_high_laneq_u32(a, b, c)                                   \
   8486   __extension__                                                         \
   8487     ({                                                                  \
   8488        uint32x4_t b_ = (b);                                             \
   8489        uint32x4_t a_ = (a);                                             \
   8490        uint64x2_t result;                                               \
   8491        __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
   8492                 : "=w"(result)                                          \
   8493                 : "w"(a_), "w"(b_), "i"(c)                              \
   8494                 : /* No clobbers */);                                   \
   8495        result;                                                          \
   8496      })
   8497 
   8498 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8499 vmull_high_n_s16 (int16x8_t a, int16_t b)
   8500 {
   8501   int32x4_t result;
   8502   __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
   8503            : "=w"(result)
   8504            : "w"(a), "x"(b)
   8505            : /* No clobbers */);
   8506   return result;
   8507 }
   8508 
   8509 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   8510 vmull_high_n_s32 (int32x4_t a, int32_t b)
   8511 {
   8512   int64x2_t result;
   8513   __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
   8514            : "=w"(result)
   8515            : "w"(a), "w"(b)
   8516            : /* No clobbers */);
   8517   return result;
   8518 }
   8519 
   8520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8521 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
   8522 {
   8523   uint32x4_t result;
   8524   __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
   8525            : "=w"(result)
   8526            : "w"(a), "x"(b)
   8527            : /* No clobbers */);
   8528   return result;
   8529 }
   8530 
   8531 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   8532 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
   8533 {
   8534   uint64x2_t result;
   8535   __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
   8536            : "=w"(result)
   8537            : "w"(a), "w"(b)
   8538            : /* No clobbers */);
   8539   return result;
   8540 }
   8541 
   8542 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   8543 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
   8544 {
   8545   poly16x8_t result;
   8546   __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
   8547            : "=w"(result)
   8548            : "w"(a), "w"(b)
   8549            : /* No clobbers */);
   8550   return result;
   8551 }
   8552 
   8553 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   8554 vmull_high_s8 (int8x16_t a, int8x16_t b)
   8555 {
   8556   int16x8_t result;
   8557   __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
   8558            : "=w"(result)
   8559            : "w"(a), "w"(b)
   8560            : /* No clobbers */);
   8561   return result;
   8562 }
   8563 
   8564 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8565 vmull_high_s16 (int16x8_t a, int16x8_t b)
   8566 {
   8567   int32x4_t result;
   8568   __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
   8569            : "=w"(result)
   8570            : "w"(a), "w"(b)
   8571            : /* No clobbers */);
   8572   return result;
   8573 }
   8574 
   8575 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   8576 vmull_high_s32 (int32x4_t a, int32x4_t b)
   8577 {
   8578   int64x2_t result;
   8579   __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
   8580            : "=w"(result)
   8581            : "w"(a), "w"(b)
   8582            : /* No clobbers */);
   8583   return result;
   8584 }
   8585 
   8586 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   8587 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
   8588 {
   8589   uint16x8_t result;
   8590   __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
   8591            : "=w"(result)
   8592            : "w"(a), "w"(b)
   8593            : /* No clobbers */);
   8594   return result;
   8595 }
   8596 
   8597 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8598 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
   8599 {
   8600   uint32x4_t result;
   8601   __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
   8602            : "=w"(result)
   8603            : "w"(a), "w"(b)
   8604            : /* No clobbers */);
   8605   return result;
   8606 }
   8607 
   8608 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   8609 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
   8610 {
   8611   uint64x2_t result;
   8612   __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
   8613            : "=w"(result)
   8614            : "w"(a), "w"(b)
   8615            : /* No clobbers */);
   8616   return result;
   8617 }
   8618 
   8619 #define vmull_lane_s16(a, b, c)                                         \
   8620   __extension__                                                         \
   8621     ({                                                                  \
   8622        int16x4_t b_ = (b);                                              \
   8623        int16x4_t a_ = (a);                                              \
   8624        int32x4_t result;                                                \
   8625        __asm__ ("smull %0.4s,%1.4h,%2.h[%3]"                            \
   8626                 : "=w"(result)                                          \
   8627                 : "w"(a_), "x"(b_), "i"(c)                              \
   8628                 : /* No clobbers */);                                   \
   8629        result;                                                          \
   8630      })
   8631 
   8632 #define vmull_lane_s32(a, b, c)                                         \
   8633   __extension__                                                         \
   8634     ({                                                                  \
   8635        int32x2_t b_ = (b);                                              \
   8636        int32x2_t a_ = (a);                                              \
   8637        int64x2_t result;                                                \
   8638        __asm__ ("smull %0.2d,%1.2s,%2.s[%3]"                            \
   8639                 : "=w"(result)                                          \
   8640                 : "w"(a_), "w"(b_), "i"(c)                              \
   8641                 : /* No clobbers */);                                   \
   8642        result;                                                          \
   8643      })
   8644 
   8645 #define vmull_lane_u16(a, b, c)                                         \
   8646   __extension__                                                         \
   8647     ({                                                                  \
   8648        uint16x4_t b_ = (b);                                             \
   8649        uint16x4_t a_ = (a);                                             \
   8650        uint32x4_t result;                                               \
   8651        __asm__ ("umull %0.4s,%1.4h,%2.h[%3]"                            \
   8652                 : "=w"(result)                                          \
   8653                 : "w"(a_), "x"(b_), "i"(c)                              \
   8654                 : /* No clobbers */);                                   \
   8655        result;                                                          \
   8656      })
   8657 
   8658 #define vmull_lane_u32(a, b, c)                                         \
   8659   __extension__                                                         \
   8660     ({                                                                  \
   8661        uint32x2_t b_ = (b);                                             \
   8662        uint32x2_t a_ = (a);                                             \
   8663        uint64x2_t result;                                               \
   8664        __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
   8665                 : "=w"(result)                                          \
   8666                 : "w"(a_), "w"(b_), "i"(c)                              \
   8667                 : /* No clobbers */);                                   \
   8668        result;                                                          \
   8669      })
   8670 
   8671 #define vmull_laneq_s16(a, b, c)                                        \
   8672   __extension__                                                         \
   8673     ({                                                                  \
   8674        int16x8_t b_ = (b);                                              \
   8675        int16x4_t a_ = (a);                                              \
   8676        int32x4_t result;                                                \
   8677        __asm__ ("smull %0.4s, %1.4h, %2.h[%3]"                          \
   8678                 : "=w"(result)                                          \
   8679                 : "w"(a_), "x"(b_), "i"(c)                              \
   8680                 : /* No clobbers */);                                   \
   8681        result;                                                          \
   8682      })
   8683 
   8684 #define vmull_laneq_s32(a, b, c)                                        \
   8685   __extension__                                                         \
   8686     ({                                                                  \
   8687        int32x4_t b_ = (b);                                              \
   8688        int32x2_t a_ = (a);                                              \
   8689        int64x2_t result;                                                \
   8690        __asm__ ("smull %0.2d, %1.2s, %2.s[%3]"                          \
   8691                 : "=w"(result)                                          \
   8692                 : "w"(a_), "w"(b_), "i"(c)                              \
   8693                 : /* No clobbers */);                                   \
   8694        result;                                                          \
   8695      })
   8696 
   8697 #define vmull_laneq_u16(a, b, c)                                        \
   8698   __extension__                                                         \
   8699     ({                                                                  \
   8700        uint16x8_t b_ = (b);                                             \
   8701        uint16x4_t a_ = (a);                                             \
   8702        uint32x4_t result;                                               \
   8703        __asm__ ("umull %0.4s, %1.4h, %2.h[%3]"                          \
   8704                 : "=w"(result)                                          \
   8705                 : "w"(a_), "x"(b_), "i"(c)                              \
   8706                 : /* No clobbers */);                                   \
   8707        result;                                                          \
   8708      })
   8709 
   8710 #define vmull_laneq_u32(a, b, c)                                        \
   8711   __extension__                                                         \
   8712     ({                                                                  \
   8713        uint32x4_t b_ = (b);                                             \
   8714        uint32x2_t a_ = (a);                                             \
   8715        uint64x2_t result;                                               \
   8716        __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
   8717                 : "=w"(result)                                          \
   8718                 : "w"(a_), "w"(b_), "i"(c)                              \
   8719                 : /* No clobbers */);                                   \
   8720        result;                                                          \
   8721      })
   8722 
   8723 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8724 vmull_n_s16 (int16x4_t a, int16_t b)
   8725 {
   8726   int32x4_t result;
   8727   __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
   8728            : "=w"(result)
   8729            : "w"(a), "x"(b)
   8730            : /* No clobbers */);
   8731   return result;
   8732 }
   8733 
   8734 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   8735 vmull_n_s32 (int32x2_t a, int32_t b)
   8736 {
   8737   int64x2_t result;
   8738   __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
   8739            : "=w"(result)
   8740            : "w"(a), "w"(b)
   8741            : /* No clobbers */);
   8742   return result;
   8743 }
   8744 
   8745 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8746 vmull_n_u16 (uint16x4_t a, uint16_t b)
   8747 {
   8748   uint32x4_t result;
   8749   __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
   8750            : "=w"(result)
   8751            : "w"(a), "x"(b)
   8752            : /* No clobbers */);
   8753   return result;
   8754 }
   8755 
   8756 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   8757 vmull_n_u32 (uint32x2_t a, uint32_t b)
   8758 {
   8759   uint64x2_t result;
   8760   __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
   8761            : "=w"(result)
   8762            : "w"(a), "w"(b)
   8763            : /* No clobbers */);
   8764   return result;
   8765 }
   8766 
   8767 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   8768 vmull_p8 (poly8x8_t a, poly8x8_t b)
   8769 {
   8770   poly16x8_t result;
   8771   __asm__ ("pmull %0.8h, %1.8b, %2.8b"
   8772            : "=w"(result)
   8773            : "w"(a), "w"(b)
   8774            : /* No clobbers */);
   8775   return result;
   8776 }
   8777 
   8778 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   8779 vmull_s8 (int8x8_t a, int8x8_t b)
   8780 {
   8781   int16x8_t result;
   8782   __asm__ ("smull %0.8h, %1.8b, %2.8b"
   8783            : "=w"(result)
   8784            : "w"(a), "w"(b)
   8785            : /* No clobbers */);
   8786   return result;
   8787 }
   8788 
   8789 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8790 vmull_s16 (int16x4_t a, int16x4_t b)
   8791 {
   8792   int32x4_t result;
   8793   __asm__ ("smull %0.4s, %1.4h, %2.4h"
   8794            : "=w"(result)
   8795            : "w"(a), "w"(b)
   8796            : /* No clobbers */);
   8797   return result;
   8798 }
   8799 
   8800 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   8801 vmull_s32 (int32x2_t a, int32x2_t b)
   8802 {
   8803   int64x2_t result;
   8804   __asm__ ("smull %0.2d, %1.2s, %2.2s"
   8805            : "=w"(result)
   8806            : "w"(a), "w"(b)
   8807            : /* No clobbers */);
   8808   return result;
   8809 }
   8810 
   8811 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   8812 vmull_u8 (uint8x8_t a, uint8x8_t b)
   8813 {
   8814   uint16x8_t result;
   8815   __asm__ ("umull %0.8h, %1.8b, %2.8b"
   8816            : "=w"(result)
   8817            : "w"(a), "w"(b)
   8818            : /* No clobbers */);
   8819   return result;
   8820 }
   8821 
   8822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8823 vmull_u16 (uint16x4_t a, uint16x4_t b)
   8824 {
   8825   uint32x4_t result;
   8826   __asm__ ("umull %0.4s, %1.4h, %2.4h"
   8827            : "=w"(result)
   8828            : "w"(a), "w"(b)
   8829            : /* No clobbers */);
   8830   return result;
   8831 }
   8832 
   8833 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   8834 vmull_u32 (uint32x2_t a, uint32x2_t b)
   8835 {
   8836   uint64x2_t result;
   8837   __asm__ ("umull %0.2d, %1.2s, %2.2s"
   8838            : "=w"(result)
   8839            : "w"(a), "w"(b)
   8840            : /* No clobbers */);
   8841   return result;
   8842 }
   8843 
   8844 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   8845 vmulq_n_f32 (float32x4_t a, float32_t b)
   8846 {
   8847   float32x4_t result;
   8848   __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
   8849            : "=w"(result)
   8850            : "w"(a), "w"(b)
   8851            : /* No clobbers */);
   8852   return result;
   8853 }
   8854 
   8855 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   8856 vmulq_n_f64 (float64x2_t a, float64_t b)
   8857 {
   8858   float64x2_t result;
   8859   __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
   8860            : "=w"(result)
   8861            : "w"(a), "w"(b)
   8862            : /* No clobbers */);
   8863   return result;
   8864 }
   8865 
   8866 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   8867 vmulq_n_s16 (int16x8_t a, int16_t b)
   8868 {
   8869   int16x8_t result;
   8870   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
   8871            : "=w"(result)
   8872            : "w"(a), "x"(b)
   8873            : /* No clobbers */);
   8874   return result;
   8875 }
   8876 
   8877 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   8878 vmulq_n_s32 (int32x4_t a, int32_t b)
   8879 {
   8880   int32x4_t result;
   8881   __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
   8882            : "=w"(result)
   8883            : "w"(a), "w"(b)
   8884            : /* No clobbers */);
   8885   return result;
   8886 }
   8887 
   8888 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   8889 vmulq_n_u16 (uint16x8_t a, uint16_t b)
   8890 {
   8891   uint16x8_t result;
   8892   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
   8893            : "=w"(result)
   8894            : "w"(a), "x"(b)
   8895            : /* No clobbers */);
   8896   return result;
   8897 }
   8898 
   8899 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   8900 vmulq_n_u32 (uint32x4_t a, uint32_t b)
   8901 {
   8902   uint32x4_t result;
   8903   __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
   8904            : "=w"(result)
   8905            : "w"(a), "w"(b)
   8906            : /* No clobbers */);
   8907   return result;
   8908 }
   8909 
   8910 #define vmuls_lane_f32(a, b, c)                                         \
   8911   __extension__                                                         \
   8912     ({                                                                  \
   8913        float32x4_t b_ = (b);                                            \
   8914        float32_t a_ = (a);                                              \
   8915        float32_t result;                                                \
   8916        __asm__ ("fmul %s0,%s1,%2.s[%3]"                                 \
   8917                 : "=w"(result)                                          \
   8918                 : "w"(a_), "w"(b_), "i"(c)                              \
   8919                 : /* No clobbers */);                                   \
   8920        result;                                                          \
   8921      })
   8922 
   8923 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   8924 vmulx_f32 (float32x2_t a, float32x2_t b)
   8925 {
   8926   float32x2_t result;
   8927   __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
   8928            : "=w"(result)
   8929            : "w"(a), "w"(b)
   8930            : /* No clobbers */);
   8931   return result;
   8932 }
   8933 
   8934 #define vmulx_lane_f32(a, b, c)                                         \
   8935   __extension__                                                         \
   8936     ({                                                                  \
   8937        float32x4_t b_ = (b);                                            \
   8938        float32x2_t a_ = (a);                                            \
   8939        float32x2_t result;                                              \
   8940        __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]"                            \
   8941                 : "=w"(result)                                          \
   8942                 : "w"(a_), "w"(b_), "i"(c)                              \
   8943                 : /* No clobbers */);                                   \
   8944        result;                                                          \
   8945      })
   8946 
   8947 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   8948 vmulxd_f64 (float64_t a, float64_t b)
   8949 {
   8950   float64_t result;
   8951   __asm__ ("fmulx %d0, %d1, %d2"
   8952            : "=w"(result)
   8953            : "w"(a), "w"(b)
   8954            : /* No clobbers */);
   8955   return result;
   8956 }
   8957 
   8958 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   8959 vmulxq_f32 (float32x4_t a, float32x4_t b)
   8960 {
   8961   float32x4_t result;
   8962   __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
   8963            : "=w"(result)
   8964            : "w"(a), "w"(b)
   8965            : /* No clobbers */);
   8966   return result;
   8967 }
   8968 
   8969 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   8970 vmulxq_f64 (float64x2_t a, float64x2_t b)
   8971 {
   8972   float64x2_t result;
   8973   __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
   8974            : "=w"(result)
   8975            : "w"(a), "w"(b)
   8976            : /* No clobbers */);
   8977   return result;
   8978 }
   8979 
   8980 #define vmulxq_lane_f32(a, b, c)                                        \
   8981   __extension__                                                         \
   8982     ({                                                                  \
   8983        float32x4_t b_ = (b);                                            \
   8984        float32x4_t a_ = (a);                                            \
   8985        float32x4_t result;                                              \
   8986        __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]"                            \
   8987                 : "=w"(result)                                          \
   8988                 : "w"(a_), "w"(b_), "i"(c)                              \
   8989                 : /* No clobbers */);                                   \
   8990        result;                                                          \
   8991      })
   8992 
   8993 #define vmulxq_lane_f64(a, b, c)                                        \
   8994   __extension__                                                         \
   8995     ({                                                                  \
   8996        float64x2_t b_ = (b);                                            \
   8997        float64x2_t a_ = (a);                                            \
   8998        float64x2_t result;                                              \
   8999        __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]"                            \
   9000                 : "=w"(result)                                          \
   9001                 : "w"(a_), "w"(b_), "i"(c)                              \
   9002                 : /* No clobbers */);                                   \
   9003        result;                                                          \
   9004      })
   9005 
   9006 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   9007 vmulxs_f32 (float32_t a, float32_t b)
   9008 {
   9009   float32_t result;
   9010   __asm__ ("fmulx %s0, %s1, %s2"
   9011            : "=w"(result)
   9012            : "w"(a), "w"(b)
   9013            : /* No clobbers */);
   9014   return result;
   9015 }
   9016 
   9017 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   9018 vmvn_p8 (poly8x8_t a)
   9019 {
   9020   poly8x8_t result;
   9021   __asm__ ("mvn %0.8b,%1.8b"
   9022            : "=w"(result)
   9023            : "w"(a)
   9024            : /* No clobbers */);
   9025   return result;
   9026 }
   9027 
   9028 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   9029 vmvn_s8 (int8x8_t a)
   9030 {
   9031   int8x8_t result;
   9032   __asm__ ("mvn %0.8b,%1.8b"
   9033            : "=w"(result)
   9034            : "w"(a)
   9035            : /* No clobbers */);
   9036   return result;
   9037 }
   9038 
   9039 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   9040 vmvn_s16 (int16x4_t a)
   9041 {
   9042   int16x4_t result;
   9043   __asm__ ("mvn %0.8b,%1.8b"
   9044            : "=w"(result)
   9045            : "w"(a)
   9046            : /* No clobbers */);
   9047   return result;
   9048 }
   9049 
   9050 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   9051 vmvn_s32 (int32x2_t a)
   9052 {
   9053   int32x2_t result;
   9054   __asm__ ("mvn %0.8b,%1.8b"
   9055            : "=w"(result)
   9056            : "w"(a)
   9057            : /* No clobbers */);
   9058   return result;
   9059 }
   9060 
   9061 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   9062 vmvn_u8 (uint8x8_t a)
   9063 {
   9064   uint8x8_t result;
   9065   __asm__ ("mvn %0.8b,%1.8b"
   9066            : "=w"(result)
   9067            : "w"(a)
   9068            : /* No clobbers */);
   9069   return result;
   9070 }
   9071 
   9072 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   9073 vmvn_u16 (uint16x4_t a)
   9074 {
   9075   uint16x4_t result;
   9076   __asm__ ("mvn %0.8b,%1.8b"
   9077            : "=w"(result)
   9078            : "w"(a)
   9079            : /* No clobbers */);
   9080   return result;
   9081 }
   9082 
   9083 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   9084 vmvn_u32 (uint32x2_t a)
   9085 {
   9086   uint32x2_t result;
   9087   __asm__ ("mvn %0.8b,%1.8b"
   9088            : "=w"(result)
   9089            : "w"(a)
   9090            : /* No clobbers */);
   9091   return result;
   9092 }
   9093 
   9094 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   9095 vmvnq_p8 (poly8x16_t a)
   9096 {
   9097   poly8x16_t result;
   9098   __asm__ ("mvn %0.16b,%1.16b"
   9099            : "=w"(result)
   9100            : "w"(a)
   9101            : /* No clobbers */);
   9102   return result;
   9103 }
   9104 
   9105 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   9106 vmvnq_s8 (int8x16_t a)
   9107 {
   9108   int8x16_t result;
   9109   __asm__ ("mvn %0.16b,%1.16b"
   9110            : "=w"(result)
   9111            : "w"(a)
   9112            : /* No clobbers */);
   9113   return result;
   9114 }
   9115 
   9116 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   9117 vmvnq_s16 (int16x8_t a)
   9118 {
   9119   int16x8_t result;
   9120   __asm__ ("mvn %0.16b,%1.16b"
   9121            : "=w"(result)
   9122            : "w"(a)
   9123            : /* No clobbers */);
   9124   return result;
   9125 }
   9126 
   9127 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   9128 vmvnq_s32 (int32x4_t a)
   9129 {
   9130   int32x4_t result;
   9131   __asm__ ("mvn %0.16b,%1.16b"
   9132            : "=w"(result)
   9133            : "w"(a)
   9134            : /* No clobbers */);
   9135   return result;
   9136 }
   9137 
   9138 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   9139 vmvnq_u8 (uint8x16_t a)
   9140 {
   9141   uint8x16_t result;
   9142   __asm__ ("mvn %0.16b,%1.16b"
   9143            : "=w"(result)
   9144            : "w"(a)
   9145            : /* No clobbers */);
   9146   return result;
   9147 }
   9148 
   9149 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   9150 vmvnq_u16 (uint16x8_t a)
   9151 {
   9152   uint16x8_t result;
   9153   __asm__ ("mvn %0.16b,%1.16b"
   9154            : "=w"(result)
   9155            : "w"(a)
   9156            : /* No clobbers */);
   9157   return result;
   9158 }
   9159 
   9160 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   9161 vmvnq_u32 (uint32x4_t a)
   9162 {
   9163   uint32x4_t result;
   9164   __asm__ ("mvn %0.16b,%1.16b"
   9165            : "=w"(result)
   9166            : "w"(a)
   9167            : /* No clobbers */);
   9168   return result;
   9169 }
   9170 
   9171 
   9172 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   9173 vpadal_s8 (int16x4_t a, int8x8_t b)
   9174 {
   9175   int16x4_t result;
   9176   __asm__ ("sadalp %0.4h,%2.8b"
   9177            : "=w"(result)
   9178            : "0"(a), "w"(b)
   9179            : /* No clobbers */);
   9180   return result;
   9181 }
   9182 
   9183 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   9184 vpadal_s16 (int32x2_t a, int16x4_t b)
   9185 {
   9186   int32x2_t result;
   9187   __asm__ ("sadalp %0.2s,%2.4h"
   9188            : "=w"(result)
   9189            : "0"(a), "w"(b)
   9190            : /* No clobbers */);
   9191   return result;
   9192 }
   9193 
   9194 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   9195 vpadal_s32 (int64x1_t a, int32x2_t b)
   9196 {
   9197   int64x1_t result;
   9198   __asm__ ("sadalp %0.1d,%2.2s"
   9199            : "=w"(result)
   9200            : "0"(a), "w"(b)
   9201            : /* No clobbers */);
   9202   return result;
   9203 }
   9204 
   9205 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   9206 vpadal_u8 (uint16x4_t a, uint8x8_t b)
   9207 {
   9208   uint16x4_t result;
   9209   __asm__ ("uadalp %0.4h,%2.8b"
   9210            : "=w"(result)
   9211            : "0"(a), "w"(b)
   9212            : /* No clobbers */);
   9213   return result;
   9214 }
   9215 
   9216 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   9217 vpadal_u16 (uint32x2_t a, uint16x4_t b)
   9218 {
   9219   uint32x2_t result;
   9220   __asm__ ("uadalp %0.2s,%2.4h"
   9221            : "=w"(result)
   9222            : "0"(a), "w"(b)
   9223            : /* No clobbers */);
   9224   return result;
   9225 }
   9226 
   9227 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   9228 vpadal_u32 (uint64x1_t a, uint32x2_t b)
   9229 {
   9230   uint64x1_t result;
   9231   __asm__ ("uadalp %0.1d,%2.2s"
   9232            : "=w"(result)
   9233            : "0"(a), "w"(b)
   9234            : /* No clobbers */);
   9235   return result;
   9236 }
   9237 
   9238 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   9239 vpadalq_s8 (int16x8_t a, int8x16_t b)
   9240 {
   9241   int16x8_t result;
   9242   __asm__ ("sadalp %0.8h,%2.16b"
   9243            : "=w"(result)
   9244            : "0"(a), "w"(b)
   9245            : /* No clobbers */);
   9246   return result;
   9247 }
   9248 
   9249 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   9250 vpadalq_s16 (int32x4_t a, int16x8_t b)
   9251 {
   9252   int32x4_t result;
   9253   __asm__ ("sadalp %0.4s,%2.8h"
   9254            : "=w"(result)
   9255            : "0"(a), "w"(b)
   9256            : /* No clobbers */);
   9257   return result;
   9258 }
   9259 
   9260 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   9261 vpadalq_s32 (int64x2_t a, int32x4_t b)
   9262 {
   9263   int64x2_t result;
   9264   __asm__ ("sadalp %0.2d,%2.4s"
   9265            : "=w"(result)
   9266            : "0"(a), "w"(b)
   9267            : /* No clobbers */);
   9268   return result;
   9269 }
   9270 
   9271 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   9272 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
   9273 {
   9274   uint16x8_t result;
   9275   __asm__ ("uadalp %0.8h,%2.16b"
   9276            : "=w"(result)
   9277            : "0"(a), "w"(b)
   9278            : /* No clobbers */);
   9279   return result;
   9280 }
   9281 
   9282 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   9283 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
   9284 {
   9285   uint32x4_t result;
   9286   __asm__ ("uadalp %0.4s,%2.8h"
   9287            : "=w"(result)
   9288            : "0"(a), "w"(b)
   9289            : /* No clobbers */);
   9290   return result;
   9291 }
   9292 
   9293 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   9294 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
   9295 {
   9296   uint64x2_t result;
   9297   __asm__ ("uadalp %0.2d,%2.4s"
   9298            : "=w"(result)
   9299            : "0"(a), "w"(b)
   9300            : /* No clobbers */);
   9301   return result;
   9302 }
   9303 
   9304 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   9305 vpadd_f32 (float32x2_t a, float32x2_t b)
   9306 {
   9307   float32x2_t result;
   9308   __asm__ ("faddp %0.2s,%1.2s,%2.2s"
   9309            : "=w"(result)
   9310            : "w"(a), "w"(b)
   9311            : /* No clobbers */);
   9312   return result;
   9313 }
   9314 
   9315 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   9316 vpadd_s8 (int8x8_t __a, int8x8_t __b)
   9317 {
   9318   return __builtin_aarch64_addpv8qi (__a, __b);
   9319 }
   9320 
   9321 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   9322 vpadd_s16 (int16x4_t __a, int16x4_t __b)
   9323 {
   9324   return __builtin_aarch64_addpv4hi (__a, __b);
   9325 }
   9326 
   9327 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   9328 vpadd_s32 (int32x2_t __a, int32x2_t __b)
   9329 {
   9330   return __builtin_aarch64_addpv2si (__a, __b);
   9331 }
   9332 
   9333 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   9334 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
   9335 {
   9336   return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
   9337 						 (int8x8_t) __b);
   9338 }
   9339 
   9340 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   9341 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
   9342 {
   9343   return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
   9344 						  (int16x4_t) __b);
   9345 }
   9346 
   9347 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   9348 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
   9349 {
   9350   return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
   9351 						  (int32x2_t) __b);
   9352 }
   9353 
   9354 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   9355 vpaddd_f64 (float64x2_t a)
   9356 {
   9357   float64_t result;
   9358   __asm__ ("faddp %d0,%1.2d"
   9359            : "=w"(result)
   9360            : "w"(a)
   9361            : /* No clobbers */);
   9362   return result;
   9363 }
   9364 
   9365 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   9366 vpaddl_s8 (int8x8_t a)
   9367 {
   9368   int16x4_t result;
   9369   __asm__ ("saddlp %0.4h,%1.8b"
   9370            : "=w"(result)
   9371            : "w"(a)
   9372            : /* No clobbers */);
   9373   return result;
   9374 }
   9375 
   9376 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   9377 vpaddl_s16 (int16x4_t a)
   9378 {
   9379   int32x2_t result;
   9380   __asm__ ("saddlp %0.2s,%1.4h"
   9381            : "=w"(result)
   9382            : "w"(a)
   9383            : /* No clobbers */);
   9384   return result;
   9385 }
   9386 
   9387 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   9388 vpaddl_s32 (int32x2_t a)
   9389 {
   9390   int64x1_t result;
   9391   __asm__ ("saddlp %0.1d,%1.2s"
   9392            : "=w"(result)
   9393            : "w"(a)
   9394            : /* No clobbers */);
   9395   return result;
   9396 }
   9397 
   9398 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   9399 vpaddl_u8 (uint8x8_t a)
   9400 {
   9401   uint16x4_t result;
   9402   __asm__ ("uaddlp %0.4h,%1.8b"
   9403            : "=w"(result)
   9404            : "w"(a)
   9405            : /* No clobbers */);
   9406   return result;
   9407 }
   9408 
   9409 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   9410 vpaddl_u16 (uint16x4_t a)
   9411 {
   9412   uint32x2_t result;
   9413   __asm__ ("uaddlp %0.2s,%1.4h"
   9414            : "=w"(result)
   9415            : "w"(a)
   9416            : /* No clobbers */);
   9417   return result;
   9418 }
   9419 
   9420 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   9421 vpaddl_u32 (uint32x2_t a)
   9422 {
   9423   uint64x1_t result;
   9424   __asm__ ("uaddlp %0.1d,%1.2s"
   9425            : "=w"(result)
   9426            : "w"(a)
   9427            : /* No clobbers */);
   9428   return result;
   9429 }
   9430 
   9431 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   9432 vpaddlq_s8 (int8x16_t a)
   9433 {
   9434   int16x8_t result;
   9435   __asm__ ("saddlp %0.8h,%1.16b"
   9436            : "=w"(result)
   9437            : "w"(a)
   9438            : /* No clobbers */);
   9439   return result;
   9440 }
   9441 
   9442 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   9443 vpaddlq_s16 (int16x8_t a)
   9444 {
   9445   int32x4_t result;
   9446   __asm__ ("saddlp %0.4s,%1.8h"
   9447            : "=w"(result)
   9448            : "w"(a)
   9449            : /* No clobbers */);
   9450   return result;
   9451 }
   9452 
   9453 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   9454 vpaddlq_s32 (int32x4_t a)
   9455 {
   9456   int64x2_t result;
   9457   __asm__ ("saddlp %0.2d,%1.4s"
   9458            : "=w"(result)
   9459            : "w"(a)
   9460            : /* No clobbers */);
   9461   return result;
   9462 }
   9463 
   9464 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   9465 vpaddlq_u8 (uint8x16_t a)
   9466 {
   9467   uint16x8_t result;
   9468   __asm__ ("uaddlp %0.8h,%1.16b"
   9469            : "=w"(result)
   9470            : "w"(a)
   9471            : /* No clobbers */);
   9472   return result;
   9473 }
   9474 
   9475 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   9476 vpaddlq_u16 (uint16x8_t a)
   9477 {
   9478   uint32x4_t result;
   9479   __asm__ ("uaddlp %0.4s,%1.8h"
   9480            : "=w"(result)
   9481            : "w"(a)
   9482            : /* No clobbers */);
   9483   return result;
   9484 }
   9485 
   9486 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   9487 vpaddlq_u32 (uint32x4_t a)
   9488 {
   9489   uint64x2_t result;
   9490   __asm__ ("uaddlp %0.2d,%1.4s"
   9491            : "=w"(result)
   9492            : "w"(a)
   9493            : /* No clobbers */);
   9494   return result;
   9495 }
   9496 
   9497 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   9498 vpaddq_f32 (float32x4_t a, float32x4_t b)
   9499 {
   9500   float32x4_t result;
   9501   __asm__ ("faddp %0.4s,%1.4s,%2.4s"
   9502            : "=w"(result)
   9503            : "w"(a), "w"(b)
   9504            : /* No clobbers */);
   9505   return result;
   9506 }
   9507 
   9508 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   9509 vpaddq_f64 (float64x2_t a, float64x2_t b)
   9510 {
   9511   float64x2_t result;
   9512   __asm__ ("faddp %0.2d,%1.2d,%2.2d"
   9513            : "=w"(result)
   9514            : "w"(a), "w"(b)
   9515            : /* No clobbers */);
   9516   return result;
   9517 }
   9518 
   9519 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   9520 vpaddq_s8 (int8x16_t a, int8x16_t b)
   9521 {
   9522   int8x16_t result;
   9523   __asm__ ("addp %0.16b,%1.16b,%2.16b"
   9524            : "=w"(result)
   9525            : "w"(a), "w"(b)
   9526            : /* No clobbers */);
   9527   return result;
   9528 }
   9529 
   9530 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   9531 vpaddq_s16 (int16x8_t a, int16x8_t b)
   9532 {
   9533   int16x8_t result;
   9534   __asm__ ("addp %0.8h,%1.8h,%2.8h"
   9535            : "=w"(result)
   9536            : "w"(a), "w"(b)
   9537            : /* No clobbers */);
   9538   return result;
   9539 }
   9540 
   9541 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   9542 vpaddq_s32 (int32x4_t a, int32x4_t b)
   9543 {
   9544   int32x4_t result;
   9545   __asm__ ("addp %0.4s,%1.4s,%2.4s"
   9546            : "=w"(result)
   9547            : "w"(a), "w"(b)
   9548            : /* No clobbers */);
   9549   return result;
   9550 }
   9551 
   9552 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   9553 vpaddq_s64 (int64x2_t a, int64x2_t b)
   9554 {
   9555   int64x2_t result;
   9556   __asm__ ("addp %0.2d,%1.2d,%2.2d"
   9557            : "=w"(result)
   9558            : "w"(a), "w"(b)
   9559            : /* No clobbers */);
   9560   return result;
   9561 }
   9562 
   9563 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   9564 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
   9565 {
   9566   uint8x16_t result;
   9567   __asm__ ("addp %0.16b,%1.16b,%2.16b"
   9568            : "=w"(result)
   9569            : "w"(a), "w"(b)
   9570            : /* No clobbers */);
   9571   return result;
   9572 }
   9573 
   9574 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   9575 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
   9576 {
   9577   uint16x8_t result;
   9578   __asm__ ("addp %0.8h,%1.8h,%2.8h"
   9579            : "=w"(result)
   9580            : "w"(a), "w"(b)
   9581            : /* No clobbers */);
   9582   return result;
   9583 }
   9584 
   9585 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   9586 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
   9587 {
   9588   uint32x4_t result;
   9589   __asm__ ("addp %0.4s,%1.4s,%2.4s"
   9590            : "=w"(result)
   9591            : "w"(a), "w"(b)
   9592            : /* No clobbers */);
   9593   return result;
   9594 }
   9595 
   9596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   9597 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
   9598 {
   9599   uint64x2_t result;
   9600   __asm__ ("addp %0.2d,%1.2d,%2.2d"
   9601            : "=w"(result)
   9602            : "w"(a), "w"(b)
   9603            : /* No clobbers */);
   9604   return result;
   9605 }
   9606 
   9607 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   9608 vpadds_f32 (float32x2_t a)
   9609 {
   9610   float32_t result;
   9611   __asm__ ("faddp %s0,%1.2s"
   9612            : "=w"(result)
   9613            : "w"(a)
   9614            : /* No clobbers */);
   9615   return result;
   9616 }
   9617 
   9618 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   9619 vpmax_f32 (float32x2_t a, float32x2_t b)
   9620 {
   9621   float32x2_t result;
   9622   __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
   9623            : "=w"(result)
   9624            : "w"(a), "w"(b)
   9625            : /* No clobbers */);
   9626   return result;
   9627 }
   9628 
   9629 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   9630 vpmax_s8 (int8x8_t a, int8x8_t b)
   9631 {
   9632   int8x8_t result;
   9633   __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
   9634            : "=w"(result)
   9635            : "w"(a), "w"(b)
   9636            : /* No clobbers */);
   9637   return result;
   9638 }
   9639 
   9640 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   9641 vpmax_s16 (int16x4_t a, int16x4_t b)
   9642 {
   9643   int16x4_t result;
   9644   __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
   9645            : "=w"(result)
   9646            : "w"(a), "w"(b)
   9647            : /* No clobbers */);
   9648   return result;
   9649 }
   9650 
   9651 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   9652 vpmax_s32 (int32x2_t a, int32x2_t b)
   9653 {
   9654   int32x2_t result;
   9655   __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
   9656            : "=w"(result)
   9657            : "w"(a), "w"(b)
   9658            : /* No clobbers */);
   9659   return result;
   9660 }
   9661 
   9662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   9663 vpmax_u8 (uint8x8_t a, uint8x8_t b)
   9664 {
   9665   uint8x8_t result;
   9666   __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
   9667            : "=w"(result)
   9668            : "w"(a), "w"(b)
   9669            : /* No clobbers */);
   9670   return result;
   9671 }
   9672 
   9673 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   9674 vpmax_u16 (uint16x4_t a, uint16x4_t b)
   9675 {
   9676   uint16x4_t result;
   9677   __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
   9678            : "=w"(result)
   9679            : "w"(a), "w"(b)
   9680            : /* No clobbers */);
   9681   return result;
   9682 }
   9683 
   9684 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   9685 vpmax_u32 (uint32x2_t a, uint32x2_t b)
   9686 {
   9687   uint32x2_t result;
   9688   __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
   9689            : "=w"(result)
   9690            : "w"(a), "w"(b)
   9691            : /* No clobbers */);
   9692   return result;
   9693 }
   9694 
   9695 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   9696 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
   9697 {
   9698   float32x2_t result;
   9699   __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
   9700            : "=w"(result)
   9701            : "w"(a), "w"(b)
   9702            : /* No clobbers */);
   9703   return result;
   9704 }
   9705 
   9706 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   9707 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
   9708 {
   9709   float32x4_t result;
   9710   __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
   9711            : "=w"(result)
   9712            : "w"(a), "w"(b)
   9713            : /* No clobbers */);
   9714   return result;
   9715 }
   9716 
   9717 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   9718 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
   9719 {
   9720   float64x2_t result;
   9721   __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
   9722            : "=w"(result)
   9723            : "w"(a), "w"(b)
   9724            : /* No clobbers */);
   9725   return result;
   9726 }
   9727 
   9728 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   9729 vpmaxnmqd_f64 (float64x2_t a)
   9730 {
   9731   float64_t result;
   9732   __asm__ ("fmaxnmp %d0,%1.2d"
   9733            : "=w"(result)
   9734            : "w"(a)
   9735            : /* No clobbers */);
   9736   return result;
   9737 }
   9738 
   9739 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   9740 vpmaxnms_f32 (float32x2_t a)
   9741 {
   9742   float32_t result;
   9743   __asm__ ("fmaxnmp %s0,%1.2s"
   9744            : "=w"(result)
   9745            : "w"(a)
   9746            : /* No clobbers */);
   9747   return result;
   9748 }
   9749 
   9750 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   9751 vpmaxq_f32 (float32x4_t a, float32x4_t b)
   9752 {
   9753   float32x4_t result;
   9754   __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
   9755            : "=w"(result)
   9756            : "w"(a), "w"(b)
   9757            : /* No clobbers */);
   9758   return result;
   9759 }
   9760 
   9761 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   9762 vpmaxq_f64 (float64x2_t a, float64x2_t b)
   9763 {
   9764   float64x2_t result;
   9765   __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
   9766            : "=w"(result)
   9767            : "w"(a), "w"(b)
   9768            : /* No clobbers */);
   9769   return result;
   9770 }
   9771 
   9772 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   9773 vpmaxq_s8 (int8x16_t a, int8x16_t b)
   9774 {
   9775   int8x16_t result;
   9776   __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
   9777            : "=w"(result)
   9778            : "w"(a), "w"(b)
   9779            : /* No clobbers */);
   9780   return result;
   9781 }
   9782 
   9783 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   9784 vpmaxq_s16 (int16x8_t a, int16x8_t b)
   9785 {
   9786   int16x8_t result;
   9787   __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
   9788            : "=w"(result)
   9789            : "w"(a), "w"(b)
   9790            : /* No clobbers */);
   9791   return result;
   9792 }
   9793 
   9794 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   9795 vpmaxq_s32 (int32x4_t a, int32x4_t b)
   9796 {
   9797   int32x4_t result;
   9798   __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
   9799            : "=w"(result)
   9800            : "w"(a), "w"(b)
   9801            : /* No clobbers */);
   9802   return result;
   9803 }
   9804 
   9805 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   9806 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
   9807 {
   9808   uint8x16_t result;
   9809   __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
   9810            : "=w"(result)
   9811            : "w"(a), "w"(b)
   9812            : /* No clobbers */);
   9813   return result;
   9814 }
   9815 
   9816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   9817 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
   9818 {
   9819   uint16x8_t result;
   9820   __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
   9821            : "=w"(result)
   9822            : "w"(a), "w"(b)
   9823            : /* No clobbers */);
   9824   return result;
   9825 }
   9826 
   9827 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   9828 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
   9829 {
   9830   uint32x4_t result;
   9831   __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
   9832            : "=w"(result)
   9833            : "w"(a), "w"(b)
   9834            : /* No clobbers */);
   9835   return result;
   9836 }
   9837 
   9838 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   9839 vpmaxqd_f64 (float64x2_t a)
   9840 {
   9841   float64_t result;
   9842   __asm__ ("fmaxp %d0,%1.2d"
   9843            : "=w"(result)
   9844            : "w"(a)
   9845            : /* No clobbers */);
   9846   return result;
   9847 }
   9848 
   9849 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   9850 vpmaxs_f32 (float32x2_t a)
   9851 {
   9852   float32_t result;
   9853   __asm__ ("fmaxp %s0,%1.2s"
   9854            : "=w"(result)
   9855            : "w"(a)
   9856            : /* No clobbers */);
   9857   return result;
   9858 }
   9859 
   9860 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   9861 vpmin_f32 (float32x2_t a, float32x2_t b)
   9862 {
   9863   float32x2_t result;
   9864   __asm__ ("fminp %0.2s, %1.2s, %2.2s"
   9865            : "=w"(result)
   9866            : "w"(a), "w"(b)
   9867            : /* No clobbers */);
   9868   return result;
   9869 }
   9870 
   9871 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   9872 vpmin_s8 (int8x8_t a, int8x8_t b)
   9873 {
   9874   int8x8_t result;
   9875   __asm__ ("sminp %0.8b, %1.8b, %2.8b"
   9876            : "=w"(result)
   9877            : "w"(a), "w"(b)
   9878            : /* No clobbers */);
   9879   return result;
   9880 }
   9881 
   9882 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   9883 vpmin_s16 (int16x4_t a, int16x4_t b)
   9884 {
   9885   int16x4_t result;
   9886   __asm__ ("sminp %0.4h, %1.4h, %2.4h"
   9887            : "=w"(result)
   9888            : "w"(a), "w"(b)
   9889            : /* No clobbers */);
   9890   return result;
   9891 }
   9892 
   9893 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   9894 vpmin_s32 (int32x2_t a, int32x2_t b)
   9895 {
   9896   int32x2_t result;
   9897   __asm__ ("sminp %0.2s, %1.2s, %2.2s"
   9898            : "=w"(result)
   9899            : "w"(a), "w"(b)
   9900            : /* No clobbers */);
   9901   return result;
   9902 }
   9903 
   9904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   9905 vpmin_u8 (uint8x8_t a, uint8x8_t b)
   9906 {
   9907   uint8x8_t result;
   9908   __asm__ ("uminp %0.8b, %1.8b, %2.8b"
   9909            : "=w"(result)
   9910            : "w"(a), "w"(b)
   9911            : /* No clobbers */);
   9912   return result;
   9913 }
   9914 
   9915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   9916 vpmin_u16 (uint16x4_t a, uint16x4_t b)
   9917 {
   9918   uint16x4_t result;
   9919   __asm__ ("uminp %0.4h, %1.4h, %2.4h"
   9920            : "=w"(result)
   9921            : "w"(a), "w"(b)
   9922            : /* No clobbers */);
   9923   return result;
   9924 }
   9925 
   9926 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   9927 vpmin_u32 (uint32x2_t a, uint32x2_t b)
   9928 {
   9929   uint32x2_t result;
   9930   __asm__ ("uminp %0.2s, %1.2s, %2.2s"
   9931            : "=w"(result)
   9932            : "w"(a), "w"(b)
   9933            : /* No clobbers */);
   9934   return result;
   9935 }
   9936 
   9937 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   9938 vpminnm_f32 (float32x2_t a, float32x2_t b)
   9939 {
   9940   float32x2_t result;
   9941   __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
   9942            : "=w"(result)
   9943            : "w"(a), "w"(b)
   9944            : /* No clobbers */);
   9945   return result;
   9946 }
   9947 
   9948 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   9949 vpminnmq_f32 (float32x4_t a, float32x4_t b)
   9950 {
   9951   float32x4_t result;
   9952   __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
   9953            : "=w"(result)
   9954            : "w"(a), "w"(b)
   9955            : /* No clobbers */);
   9956   return result;
   9957 }
   9958 
   9959 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   9960 vpminnmq_f64 (float64x2_t a, float64x2_t b)
   9961 {
   9962   float64x2_t result;
   9963   __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
   9964            : "=w"(result)
   9965            : "w"(a), "w"(b)
   9966            : /* No clobbers */);
   9967   return result;
   9968 }
   9969 
   9970 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   9971 vpminnmqd_f64 (float64x2_t a)
   9972 {
   9973   float64_t result;
   9974   __asm__ ("fminnmp %d0,%1.2d"
   9975            : "=w"(result)
   9976            : "w"(a)
   9977            : /* No clobbers */);
   9978   return result;
   9979 }
   9980 
   9981 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   9982 vpminnms_f32 (float32x2_t a)
   9983 {
   9984   float32_t result;
   9985   __asm__ ("fminnmp %s0,%1.2s"
   9986            : "=w"(result)
   9987            : "w"(a)
   9988            : /* No clobbers */);
   9989   return result;
   9990 }
   9991 
   9992 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   9993 vpminq_f32 (float32x4_t a, float32x4_t b)
   9994 {
   9995   float32x4_t result;
   9996   __asm__ ("fminp %0.4s, %1.4s, %2.4s"
   9997            : "=w"(result)
   9998            : "w"(a), "w"(b)
   9999            : /* No clobbers */);
   10000   return result;
   10001 }
   10002 
   10003 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   10004 vpminq_f64 (float64x2_t a, float64x2_t b)
   10005 {
   10006   float64x2_t result;
   10007   __asm__ ("fminp %0.2d, %1.2d, %2.2d"
   10008            : "=w"(result)
   10009            : "w"(a), "w"(b)
   10010            : /* No clobbers */);
   10011   return result;
   10012 }
   10013 
   10014 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   10015 vpminq_s8 (int8x16_t a, int8x16_t b)
   10016 {
   10017   int8x16_t result;
   10018   __asm__ ("sminp %0.16b, %1.16b, %2.16b"
   10019            : "=w"(result)
   10020            : "w"(a), "w"(b)
   10021            : /* No clobbers */);
   10022   return result;
   10023 }
   10024 
   10025 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   10026 vpminq_s16 (int16x8_t a, int16x8_t b)
   10027 {
   10028   int16x8_t result;
   10029   __asm__ ("sminp %0.8h, %1.8h, %2.8h"
   10030            : "=w"(result)
   10031            : "w"(a), "w"(b)
   10032            : /* No clobbers */);
   10033   return result;
   10034 }
   10035 
   10036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   10037 vpminq_s32 (int32x4_t a, int32x4_t b)
   10038 {
   10039   int32x4_t result;
   10040   __asm__ ("sminp %0.4s, %1.4s, %2.4s"
   10041            : "=w"(result)
   10042            : "w"(a), "w"(b)
   10043            : /* No clobbers */);
   10044   return result;
   10045 }
   10046 
   10047 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   10048 vpminq_u8 (uint8x16_t a, uint8x16_t b)
   10049 {
   10050   uint8x16_t result;
   10051   __asm__ ("uminp %0.16b, %1.16b, %2.16b"
   10052            : "=w"(result)
   10053            : "w"(a), "w"(b)
   10054            : /* No clobbers */);
   10055   return result;
   10056 }
   10057 
   10058 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   10059 vpminq_u16 (uint16x8_t a, uint16x8_t b)
   10060 {
   10061   uint16x8_t result;
   10062   __asm__ ("uminp %0.8h, %1.8h, %2.8h"
   10063            : "=w"(result)
   10064            : "w"(a), "w"(b)
   10065            : /* No clobbers */);
   10066   return result;
   10067 }
   10068 
   10069 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   10070 vpminq_u32 (uint32x4_t a, uint32x4_t b)
   10071 {
   10072   uint32x4_t result;
   10073   __asm__ ("uminp %0.4s, %1.4s, %2.4s"
   10074            : "=w"(result)
   10075            : "w"(a), "w"(b)
   10076            : /* No clobbers */);
   10077   return result;
   10078 }
   10079 
   10080 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   10081 vpminqd_f64 (float64x2_t a)
   10082 {
   10083   float64_t result;
   10084   __asm__ ("fminp %d0,%1.2d"
   10085            : "=w"(result)
   10086            : "w"(a)
   10087            : /* No clobbers */);
   10088   return result;
   10089 }
   10090 
   10091 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   10092 vpmins_f32 (float32x2_t a)
   10093 {
   10094   float32_t result;
   10095   __asm__ ("fminp %s0,%1.2s"
   10096            : "=w"(result)
   10097            : "w"(a)
   10098            : /* No clobbers */);
   10099   return result;
   10100 }
   10101 
   10102 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   10103 vqdmulh_n_s16 (int16x4_t a, int16_t b)
   10104 {
   10105   int16x4_t result;
   10106   __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
   10107            : "=w"(result)
   10108            : "w"(a), "x"(b)
   10109            : /* No clobbers */);
   10110   return result;
   10111 }
   10112 
   10113 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   10114 vqdmulh_n_s32 (int32x2_t a, int32_t b)
   10115 {
   10116   int32x2_t result;
   10117   __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
   10118            : "=w"(result)
   10119            : "w"(a), "w"(b)
   10120            : /* No clobbers */);
   10121   return result;
   10122 }
   10123 
   10124 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   10125 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
   10126 {
   10127   int16x8_t result;
   10128   __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
   10129            : "=w"(result)
   10130            : "w"(a), "x"(b)
   10131            : /* No clobbers */);
   10132   return result;
   10133 }
   10134 
   10135 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   10136 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
   10137 {
   10138   int32x4_t result;
   10139   __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
   10140            : "=w"(result)
   10141            : "w"(a), "w"(b)
   10142            : /* No clobbers */);
   10143   return result;
   10144 }
   10145 
   10146 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   10147 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
   10148 {
   10149   int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   10150   __asm__ ("sqxtn2 %0.16b, %1.8h"
   10151            : "+w"(result)
   10152            : "w"(b)
   10153            : /* No clobbers */);
   10154   return result;
   10155 }
   10156 
   10157 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   10158 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
   10159 {
   10160   int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   10161   __asm__ ("sqxtn2 %0.8h, %1.4s"
   10162            : "+w"(result)
   10163            : "w"(b)
   10164            : /* No clobbers */);
   10165   return result;
   10166 }
   10167 
   10168 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   10169 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
   10170 {
   10171   int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   10172   __asm__ ("sqxtn2 %0.4s, %1.2d"
   10173            : "+w"(result)
   10174            : "w"(b)
   10175            : /* No clobbers */);
   10176   return result;
   10177 }
   10178 
   10179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   10180 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
   10181 {
   10182   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   10183   __asm__ ("uqxtn2 %0.16b, %1.8h"
   10184            : "+w"(result)
   10185            : "w"(b)
   10186            : /* No clobbers */);
   10187   return result;
   10188 }
   10189 
   10190 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   10191 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
   10192 {
   10193   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   10194   __asm__ ("uqxtn2 %0.8h, %1.4s"
   10195            : "+w"(result)
   10196            : "w"(b)
   10197            : /* No clobbers */);
   10198   return result;
   10199 }
   10200 
   10201 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   10202 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
   10203 {
   10204   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   10205   __asm__ ("uqxtn2 %0.4s, %1.2d"
   10206            : "+w"(result)
   10207            : "w"(b)
   10208            : /* No clobbers */);
   10209   return result;
   10210 }
   10211 
   10212 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   10213 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
   10214 {
   10215   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   10216   __asm__ ("sqxtun2 %0.16b, %1.8h"
   10217            : "+w"(result)
   10218            : "w"(b)
   10219            : /* No clobbers */);
   10220   return result;
   10221 }
   10222 
   10223 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   10224 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
   10225 {
   10226   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   10227   __asm__ ("sqxtun2 %0.8h, %1.4s"
   10228            : "+w"(result)
   10229            : "w"(b)
   10230            : /* No clobbers */);
   10231   return result;
   10232 }
   10233 
   10234 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   10235 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
   10236 {
   10237   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   10238   __asm__ ("sqxtun2 %0.4s, %1.2d"
   10239            : "+w"(result)
   10240            : "w"(b)
   10241            : /* No clobbers */);
   10242   return result;
   10243 }
   10244 
   10245 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   10246 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
   10247 {
   10248   int16x4_t result;
   10249   __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
   10250            : "=w"(result)
   10251            : "w"(a), "x"(b)
   10252            : /* No clobbers */);
   10253   return result;
   10254 }
   10255 
   10256 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   10257 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
   10258 {
   10259   int32x2_t result;
   10260   __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
   10261            : "=w"(result)
   10262            : "w"(a), "w"(b)
   10263            : /* No clobbers */);
   10264   return result;
   10265 }
   10266 
   10267 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   10268 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
   10269 {
   10270   int16x8_t result;
   10271   __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
   10272            : "=w"(result)
   10273            : "w"(a), "x"(b)
   10274            : /* No clobbers */);
   10275   return result;
   10276 }
   10277 
   10278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   10279 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
   10280 {
   10281   int32x4_t result;
   10282   __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
   10283            : "=w"(result)
   10284            : "w"(a), "w"(b)
   10285            : /* No clobbers */);
   10286   return result;
   10287 }
   10288 
   10289 #define vqrshrn_high_n_s16(a, b, c)                                     \
   10290   __extension__                                                         \
   10291     ({                                                                  \
   10292        int16x8_t b_ = (b);                                              \
   10293        int8x8_t a_ = (a);                                               \
   10294        int8x16_t result = vcombine_s8                                   \
   10295                             (a_, vcreate_s8                             \
   10296                                    (__AARCH64_UINT64_C (0x0)));         \
   10297        __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2"                           \
   10298                 : "+w"(result)                                          \
   10299                 : "w"(b_), "i"(c)                                       \
   10300                 : /* No clobbers */);                                   \
   10301        result;                                                          \
   10302      })
   10303 
   10304 #define vqrshrn_high_n_s32(a, b, c)                                     \
   10305   __extension__                                                         \
   10306     ({                                                                  \
   10307        int32x4_t b_ = (b);                                              \
   10308        int16x4_t a_ = (a);                                              \
   10309        int16x8_t result = vcombine_s16                                  \
   10310                             (a_, vcreate_s16                            \
   10311                                    (__AARCH64_UINT64_C (0x0)));         \
   10312        __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2"                            \
   10313                 : "+w"(result)                                          \
   10314                 : "w"(b_), "i"(c)                                       \
   10315                 : /* No clobbers */);                                   \
   10316        result;                                                          \
   10317      })
   10318 
   10319 #define vqrshrn_high_n_s64(a, b, c)                                     \
   10320   __extension__                                                         \
   10321     ({                                                                  \
   10322        int64x2_t b_ = (b);                                              \
   10323        int32x2_t a_ = (a);                                              \
   10324        int32x4_t result = vcombine_s32                                  \
   10325                             (a_, vcreate_s32                            \
   10326                                    (__AARCH64_UINT64_C (0x0)));         \
   10327        __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2"                            \
   10328                 : "+w"(result)                                          \
   10329                 : "w"(b_), "i"(c)                                       \
   10330                 : /* No clobbers */);                                   \
   10331        result;                                                          \
   10332      })
   10333 
   10334 #define vqrshrn_high_n_u16(a, b, c)                                     \
   10335   __extension__                                                         \
   10336     ({                                                                  \
   10337        uint16x8_t b_ = (b);                                             \
   10338        uint8x8_t a_ = (a);                                              \
   10339        uint8x16_t result = vcombine_u8                                  \
   10340                              (a_, vcreate_u8                            \
   10341                                     (__AARCH64_UINT64_C (0x0)));        \
   10342        __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2"                           \
   10343                 : "+w"(result)                                          \
   10344                 : "w"(b_), "i"(c)                                       \
   10345                 : /* No clobbers */);                                   \
   10346        result;                                                          \
   10347      })
   10348 
   10349 #define vqrshrn_high_n_u32(a, b, c)                                     \
   10350   __extension__                                                         \
   10351     ({                                                                  \
   10352        uint32x4_t b_ = (b);                                             \
   10353        uint16x4_t a_ = (a);                                             \
   10354        uint16x8_t result = vcombine_u16                                 \
   10355                              (a_, vcreate_u16                           \
   10356                                     (__AARCH64_UINT64_C (0x0)));        \
   10357        __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2"                            \
   10358                 : "+w"(result)                                          \
   10359                 : "w"(b_), "i"(c)                                       \
   10360                 : /* No clobbers */);                                   \
   10361        result;                                                          \
   10362      })
   10363 
   10364 #define vqrshrn_high_n_u64(a, b, c)                                     \
   10365   __extension__                                                         \
   10366     ({                                                                  \
   10367        uint64x2_t b_ = (b);                                             \
   10368        uint32x2_t a_ = (a);                                             \
   10369        uint32x4_t result = vcombine_u32                                 \
   10370                              (a_, vcreate_u32                           \
   10371                                     (__AARCH64_UINT64_C (0x0)));        \
   10372        __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2"                            \
   10373                 : "+w"(result)                                          \
   10374                 : "w"(b_), "i"(c)                                       \
   10375                 : /* No clobbers */);                                   \
   10376        result;                                                          \
   10377      })
   10378 
   10379 #define vqrshrun_high_n_s16(a, b, c)                                    \
   10380   __extension__                                                         \
   10381     ({                                                                  \
   10382        int16x8_t b_ = (b);                                              \
   10383        uint8x8_t a_ = (a);                                              \
   10384        uint8x16_t result = vcombine_u8                                  \
   10385                              (a_, vcreate_u8                            \
   10386                                     (__AARCH64_UINT64_C (0x0)));        \
   10387        __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2"                          \
   10388                 : "+w"(result)                                          \
   10389                 : "w"(b_), "i"(c)                                       \
   10390                 : /* No clobbers */);                                   \
   10391        result;                                                          \
   10392      })
   10393 
   10394 #define vqrshrun_high_n_s32(a, b, c)                                    \
   10395   __extension__                                                         \
   10396     ({                                                                  \
   10397        int32x4_t b_ = (b);                                              \
   10398        uint16x4_t a_ = (a);                                             \
   10399        uint16x8_t result = vcombine_u16                                 \
   10400                              (a_, vcreate_u16                           \
   10401                                     (__AARCH64_UINT64_C (0x0)));        \
   10402        __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2"                           \
   10403                 : "+w"(result)                                          \
   10404                 : "w"(b_), "i"(c)                                       \
   10405                 : /* No clobbers */);                                   \
   10406        result;                                                          \
   10407      })
   10408 
   10409 #define vqrshrun_high_n_s64(a, b, c)                                    \
   10410   __extension__                                                         \
   10411     ({                                                                  \
   10412        int64x2_t b_ = (b);                                              \
   10413        uint32x2_t a_ = (a);                                             \
   10414        uint32x4_t result = vcombine_u32                                 \
   10415                              (a_, vcreate_u32                           \
   10416                                     (__AARCH64_UINT64_C (0x0)));        \
   10417        __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2"                           \
   10418                 : "+w"(result)                                          \
   10419                 : "w"(b_), "i"(c)                                       \
   10420                 : /* No clobbers */);                                   \
   10421        result;                                                          \
   10422      })
   10423 
   10424 #define vqshrn_high_n_s16(a, b, c)                                      \
   10425   __extension__                                                         \
   10426     ({                                                                  \
   10427        int16x8_t b_ = (b);                                              \
   10428        int8x8_t a_ = (a);                                               \
   10429        int8x16_t result = vcombine_s8                                   \
   10430                             (a_, vcreate_s8                             \
   10431                                    (__AARCH64_UINT64_C (0x0)));         \
   10432        __asm__ ("sqshrn2 %0.16b, %1.8h, #%2"                            \
   10433                 : "+w"(result)                                          \
   10434                 : "w"(b_), "i"(c)                                       \
   10435                 : /* No clobbers */);                                   \
   10436        result;                                                          \
   10437      })
   10438 
   10439 #define vqshrn_high_n_s32(a, b, c)                                      \
   10440   __extension__                                                         \
   10441     ({                                                                  \
   10442        int32x4_t b_ = (b);                                              \
   10443        int16x4_t a_ = (a);                                              \
   10444        int16x8_t result = vcombine_s16                                  \
   10445                             (a_, vcreate_s16                            \
   10446                                    (__AARCH64_UINT64_C (0x0)));         \
   10447        __asm__ ("sqshrn2 %0.8h, %1.4s, #%2"                             \
   10448                 : "+w"(result)                                          \
   10449                 : "w"(b_), "i"(c)                                       \
   10450                 : /* No clobbers */);                                   \
   10451        result;                                                          \
   10452      })
   10453 
   10454 #define vqshrn_high_n_s64(a, b, c)                                      \
   10455   __extension__                                                         \
   10456     ({                                                                  \
   10457        int64x2_t b_ = (b);                                              \
   10458        int32x2_t a_ = (a);                                              \
   10459        int32x4_t result = vcombine_s32                                  \
   10460                             (a_, vcreate_s32                            \
   10461                                    (__AARCH64_UINT64_C (0x0)));         \
   10462        __asm__ ("sqshrn2 %0.4s, %1.2d, #%2"                             \
   10463                 : "+w"(result)                                          \
   10464                 : "w"(b_), "i"(c)                                       \
   10465                 : /* No clobbers */);                                   \
   10466        result;                                                          \
   10467      })
   10468 
   10469 #define vqshrn_high_n_u16(a, b, c)                                      \
   10470   __extension__                                                         \
   10471     ({                                                                  \
   10472        uint16x8_t b_ = (b);                                             \
   10473        uint8x8_t a_ = (a);                                              \
   10474        uint8x16_t result = vcombine_u8                                  \
   10475                              (a_, vcreate_u8                            \
   10476                                     (__AARCH64_UINT64_C (0x0)));        \
   10477        __asm__ ("uqshrn2 %0.16b, %1.8h, #%2"                            \
   10478                 : "+w"(result)                                          \
   10479                 : "w"(b_), "i"(c)                                       \
   10480                 : /* No clobbers */);                                   \
   10481        result;                                                          \
   10482      })
   10483 
   10484 #define vqshrn_high_n_u32(a, b, c)                                      \
   10485   __extension__                                                         \
   10486     ({                                                                  \
   10487        uint32x4_t b_ = (b);                                             \
   10488        uint16x4_t a_ = (a);                                             \
   10489        uint16x8_t result = vcombine_u16                                 \
   10490                              (a_, vcreate_u16                           \
   10491                                     (__AARCH64_UINT64_C (0x0)));        \
   10492        __asm__ ("uqshrn2 %0.8h, %1.4s, #%2"                             \
   10493                 : "+w"(result)                                          \
   10494                 : "w"(b_), "i"(c)                                       \
   10495                 : /* No clobbers */);                                   \
   10496        result;                                                          \
   10497      })
   10498 
   10499 #define vqshrn_high_n_u64(a, b, c)                                      \
   10500   __extension__                                                         \
   10501     ({                                                                  \
   10502        uint64x2_t b_ = (b);                                             \
   10503        uint32x2_t a_ = (a);                                             \
   10504        uint32x4_t result = vcombine_u32                                 \
   10505                              (a_, vcreate_u32                           \
   10506                                     (__AARCH64_UINT64_C (0x0)));        \
   10507        __asm__ ("uqshrn2 %0.4s, %1.2d, #%2"                             \
   10508                 : "+w"(result)                                          \
   10509                 : "w"(b_), "i"(c)                                       \
   10510                 : /* No clobbers */);                                   \
   10511        result;                                                          \
   10512      })
   10513 
   10514 #define vqshrun_high_n_s16(a, b, c)                                     \
   10515   __extension__                                                         \
   10516     ({                                                                  \
   10517        int16x8_t b_ = (b);                                              \
   10518        uint8x8_t a_ = (a);                                              \
   10519        uint8x16_t result = vcombine_u8                                  \
   10520                              (a_, vcreate_u8                            \
   10521                                     (__AARCH64_UINT64_C (0x0)));        \
   10522        __asm__ ("sqshrun2 %0.16b, %1.8h, #%2"                           \
   10523                 : "+w"(result)                                          \
   10524                 : "w"(b_), "i"(c)                                       \
   10525                 : /* No clobbers */);                                   \
   10526        result;                                                          \
   10527      })
   10528 
   10529 #define vqshrun_high_n_s32(a, b, c)                                     \
   10530   __extension__                                                         \
   10531     ({                                                                  \
   10532        int32x4_t b_ = (b);                                              \
   10533        uint16x4_t a_ = (a);                                             \
   10534        uint16x8_t result = vcombine_u16                                 \
   10535                              (a_, vcreate_u16                           \
   10536                                     (__AARCH64_UINT64_C (0x0)));        \
   10537        __asm__ ("sqshrun2 %0.8h, %1.4s, #%2"                            \
   10538                 : "+w"(result)                                          \
   10539                 : "w"(b_), "i"(c)                                       \
   10540                 : /* No clobbers */);                                   \
   10541        result;                                                          \
   10542      })
   10543 
   10544 #define vqshrun_high_n_s64(a, b, c)                                     \
   10545   __extension__                                                         \
   10546     ({                                                                  \
   10547        int64x2_t b_ = (b);                                              \
   10548        uint32x2_t a_ = (a);                                             \
   10549        uint32x4_t result = vcombine_u32                                 \
   10550                              (a_, vcreate_u32                           \
   10551                                     (__AARCH64_UINT64_C (0x0)));        \
   10552        __asm__ ("sqshrun2 %0.4s, %1.2d, #%2"                            \
   10553                 : "+w"(result)                                          \
   10554                 : "w"(b_), "i"(c)                                       \
   10555                 : /* No clobbers */);                                   \
   10556        result;                                                          \
   10557      })
   10558 
   10559 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   10560 vrbit_s8 (int8x8_t a)
   10561 {
   10562   int8x8_t result;
   10563   __asm__ ("rbit %0.8b,%1.8b"
   10564            : "=w"(result)
   10565            : "w"(a)
   10566            : /* No clobbers */);
   10567   return result;
   10568 }
   10569 
   10570 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   10571 vrbit_u8 (uint8x8_t a)
   10572 {
   10573   uint8x8_t result;
   10574   __asm__ ("rbit %0.8b,%1.8b"
   10575            : "=w"(result)
   10576            : "w"(a)
   10577            : /* No clobbers */);
   10578   return result;
   10579 }
   10580 
   10581 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   10582 vrbitq_s8 (int8x16_t a)
   10583 {
   10584   int8x16_t result;
   10585   __asm__ ("rbit %0.16b,%1.16b"
   10586            : "=w"(result)
   10587            : "w"(a)
   10588            : /* No clobbers */);
   10589   return result;
   10590 }
   10591 
   10592 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   10593 vrbitq_u8 (uint8x16_t a)
   10594 {
   10595   uint8x16_t result;
   10596   __asm__ ("rbit %0.16b,%1.16b"
   10597            : "=w"(result)
   10598            : "w"(a)
   10599            : /* No clobbers */);
   10600   return result;
   10601 }
   10602 
   10603 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   10604 vrecpe_u32 (uint32x2_t a)
   10605 {
   10606   uint32x2_t result;
   10607   __asm__ ("urecpe %0.2s,%1.2s"
   10608            : "=w"(result)
   10609            : "w"(a)
   10610            : /* No clobbers */);
   10611   return result;
   10612 }
   10613 
   10614 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   10615 vrecpeq_u32 (uint32x4_t a)
   10616 {
   10617   uint32x4_t result;
   10618   __asm__ ("urecpe %0.4s,%1.4s"
   10619            : "=w"(result)
   10620            : "w"(a)
   10621            : /* No clobbers */);
   10622   return result;
   10623 }
   10624 
   10625 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   10626 vrev16_p8 (poly8x8_t a)
   10627 {
   10628   poly8x8_t result;
   10629   __asm__ ("rev16 %0.8b,%1.8b"
   10630            : "=w"(result)
   10631            : "w"(a)
   10632            : /* No clobbers */);
   10633   return result;
   10634 }
   10635 
   10636 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   10637 vrev16_s8 (int8x8_t a)
   10638 {
   10639   int8x8_t result;
   10640   __asm__ ("rev16 %0.8b,%1.8b"
   10641            : "=w"(result)
   10642            : "w"(a)
   10643            : /* No clobbers */);
   10644   return result;
   10645 }
   10646 
   10647 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   10648 vrev16_u8 (uint8x8_t a)
   10649 {
   10650   uint8x8_t result;
   10651   __asm__ ("rev16 %0.8b,%1.8b"
   10652            : "=w"(result)
   10653            : "w"(a)
   10654            : /* No clobbers */);
   10655   return result;
   10656 }
   10657 
   10658 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   10659 vrev16q_p8 (poly8x16_t a)
   10660 {
   10661   poly8x16_t result;
   10662   __asm__ ("rev16 %0.16b,%1.16b"
   10663            : "=w"(result)
   10664            : "w"(a)
   10665            : /* No clobbers */);
   10666   return result;
   10667 }
   10668 
   10669 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   10670 vrev16q_s8 (int8x16_t a)
   10671 {
   10672   int8x16_t result;
   10673   __asm__ ("rev16 %0.16b,%1.16b"
   10674            : "=w"(result)
   10675            : "w"(a)
   10676            : /* No clobbers */);
   10677   return result;
   10678 }
   10679 
   10680 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   10681 vrev16q_u8 (uint8x16_t a)
   10682 {
   10683   uint8x16_t result;
   10684   __asm__ ("rev16 %0.16b,%1.16b"
   10685            : "=w"(result)
   10686            : "w"(a)
   10687            : /* No clobbers */);
   10688   return result;
   10689 }
   10690 
   10691 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   10692 vrev32_p8 (poly8x8_t a)
   10693 {
   10694   poly8x8_t result;
   10695   __asm__ ("rev32 %0.8b,%1.8b"
   10696            : "=w"(result)
   10697            : "w"(a)
   10698            : /* No clobbers */);
   10699   return result;
   10700 }
   10701 
   10702 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   10703 vrev32_p16 (poly16x4_t a)
   10704 {
   10705   poly16x4_t result;
   10706   __asm__ ("rev32 %0.4h,%1.4h"
   10707            : "=w"(result)
   10708            : "w"(a)
   10709            : /* No clobbers */);
   10710   return result;
   10711 }
   10712 
   10713 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   10714 vrev32_s8 (int8x8_t a)
   10715 {
   10716   int8x8_t result;
   10717   __asm__ ("rev32 %0.8b,%1.8b"
   10718            : "=w"(result)
   10719            : "w"(a)
   10720            : /* No clobbers */);
   10721   return result;
   10722 }
   10723 
   10724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   10725 vrev32_s16 (int16x4_t a)
   10726 {
   10727   int16x4_t result;
   10728   __asm__ ("rev32 %0.4h,%1.4h"
   10729            : "=w"(result)
   10730            : "w"(a)
   10731            : /* No clobbers */);
   10732   return result;
   10733 }
   10734 
   10735 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   10736 vrev32_u8 (uint8x8_t a)
   10737 {
   10738   uint8x8_t result;
   10739   __asm__ ("rev32 %0.8b,%1.8b"
   10740            : "=w"(result)
   10741            : "w"(a)
   10742            : /* No clobbers */);
   10743   return result;
   10744 }
   10745 
   10746 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   10747 vrev32_u16 (uint16x4_t a)
   10748 {
   10749   uint16x4_t result;
   10750   __asm__ ("rev32 %0.4h,%1.4h"
   10751            : "=w"(result)
   10752            : "w"(a)
   10753            : /* No clobbers */);
   10754   return result;
   10755 }
   10756 
   10757 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   10758 vrev32q_p8 (poly8x16_t a)
   10759 {
   10760   poly8x16_t result;
   10761   __asm__ ("rev32 %0.16b,%1.16b"
   10762            : "=w"(result)
   10763            : "w"(a)
   10764            : /* No clobbers */);
   10765   return result;
   10766 }
   10767 
   10768 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   10769 vrev32q_p16 (poly16x8_t a)
   10770 {
   10771   poly16x8_t result;
   10772   __asm__ ("rev32 %0.8h,%1.8h"
   10773            : "=w"(result)
   10774            : "w"(a)
   10775            : /* No clobbers */);
   10776   return result;
   10777 }
   10778 
   10779 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   10780 vrev32q_s8 (int8x16_t a)
   10781 {
   10782   int8x16_t result;
   10783   __asm__ ("rev32 %0.16b,%1.16b"
   10784            : "=w"(result)
   10785            : "w"(a)
   10786            : /* No clobbers */);
   10787   return result;
   10788 }
   10789 
   10790 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   10791 vrev32q_s16 (int16x8_t a)
   10792 {
   10793   int16x8_t result;
   10794   __asm__ ("rev32 %0.8h,%1.8h"
   10795            : "=w"(result)
   10796            : "w"(a)
   10797            : /* No clobbers */);
   10798   return result;
   10799 }
   10800 
   10801 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   10802 vrev32q_u8 (uint8x16_t a)
   10803 {
   10804   uint8x16_t result;
   10805   __asm__ ("rev32 %0.16b,%1.16b"
   10806            : "=w"(result)
   10807            : "w"(a)
   10808            : /* No clobbers */);
   10809   return result;
   10810 }
   10811 
   10812 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   10813 vrev32q_u16 (uint16x8_t a)
   10814 {
   10815   uint16x8_t result;
   10816   __asm__ ("rev32 %0.8h,%1.8h"
   10817            : "=w"(result)
   10818            : "w"(a)
   10819            : /* No clobbers */);
   10820   return result;
   10821 }
   10822 
   10823 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   10824 vrev64_f32 (float32x2_t a)
   10825 {
   10826   float32x2_t result;
   10827   __asm__ ("rev64 %0.2s,%1.2s"
   10828            : "=w"(result)
   10829            : "w"(a)
   10830            : /* No clobbers */);
   10831   return result;
   10832 }
   10833 
   10834 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   10835 vrev64_p8 (poly8x8_t a)
   10836 {
   10837   poly8x8_t result;
   10838   __asm__ ("rev64 %0.8b,%1.8b"
   10839            : "=w"(result)
   10840            : "w"(a)
   10841            : /* No clobbers */);
   10842   return result;
   10843 }
   10844 
   10845 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   10846 vrev64_p16 (poly16x4_t a)
   10847 {
   10848   poly16x4_t result;
   10849   __asm__ ("rev64 %0.4h,%1.4h"
   10850            : "=w"(result)
   10851            : "w"(a)
   10852            : /* No clobbers */);
   10853   return result;
   10854 }
   10855 
   10856 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   10857 vrev64_s8 (int8x8_t a)
   10858 {
   10859   int8x8_t result;
   10860   __asm__ ("rev64 %0.8b,%1.8b"
   10861            : "=w"(result)
   10862            : "w"(a)
   10863            : /* No clobbers */);
   10864   return result;
   10865 }
   10866 
   10867 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   10868 vrev64_s16 (int16x4_t a)
   10869 {
   10870   int16x4_t result;
   10871   __asm__ ("rev64 %0.4h,%1.4h"
   10872            : "=w"(result)
   10873            : "w"(a)
   10874            : /* No clobbers */);
   10875   return result;
   10876 }
   10877 
   10878 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   10879 vrev64_s32 (int32x2_t a)
   10880 {
   10881   int32x2_t result;
   10882   __asm__ ("rev64 %0.2s,%1.2s"
   10883            : "=w"(result)
   10884            : "w"(a)
   10885            : /* No clobbers */);
   10886   return result;
   10887 }
   10888 
   10889 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   10890 vrev64_u8 (uint8x8_t a)
   10891 {
   10892   uint8x8_t result;
   10893   __asm__ ("rev64 %0.8b,%1.8b"
   10894            : "=w"(result)
   10895            : "w"(a)
   10896            : /* No clobbers */);
   10897   return result;
   10898 }
   10899 
   10900 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   10901 vrev64_u16 (uint16x4_t a)
   10902 {
   10903   uint16x4_t result;
   10904   __asm__ ("rev64 %0.4h,%1.4h"
   10905            : "=w"(result)
   10906            : "w"(a)
   10907            : /* No clobbers */);
   10908   return result;
   10909 }
   10910 
   10911 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   10912 vrev64_u32 (uint32x2_t a)
   10913 {
   10914   uint32x2_t result;
   10915   __asm__ ("rev64 %0.2s,%1.2s"
   10916            : "=w"(result)
   10917            : "w"(a)
   10918            : /* No clobbers */);
   10919   return result;
   10920 }
   10921 
   10922 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   10923 vrev64q_f32 (float32x4_t a)
   10924 {
   10925   float32x4_t result;
   10926   __asm__ ("rev64 %0.4s,%1.4s"
   10927            : "=w"(result)
   10928            : "w"(a)
   10929            : /* No clobbers */);
   10930   return result;
   10931 }
   10932 
   10933 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   10934 vrev64q_p8 (poly8x16_t a)
   10935 {
   10936   poly8x16_t result;
   10937   __asm__ ("rev64 %0.16b,%1.16b"
   10938            : "=w"(result)
   10939            : "w"(a)
   10940            : /* No clobbers */);
   10941   return result;
   10942 }
   10943 
   10944 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   10945 vrev64q_p16 (poly16x8_t a)
   10946 {
   10947   poly16x8_t result;
   10948   __asm__ ("rev64 %0.8h,%1.8h"
   10949            : "=w"(result)
   10950            : "w"(a)
   10951            : /* No clobbers */);
   10952   return result;
   10953 }
   10954 
   10955 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   10956 vrev64q_s8 (int8x16_t a)
   10957 {
   10958   int8x16_t result;
   10959   __asm__ ("rev64 %0.16b,%1.16b"
   10960            : "=w"(result)
   10961            : "w"(a)
   10962            : /* No clobbers */);
   10963   return result;
   10964 }
   10965 
   10966 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   10967 vrev64q_s16 (int16x8_t a)
   10968 {
   10969   int16x8_t result;
   10970   __asm__ ("rev64 %0.8h,%1.8h"
   10971            : "=w"(result)
   10972            : "w"(a)
   10973            : /* No clobbers */);
   10974   return result;
   10975 }
   10976 
   10977 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   10978 vrev64q_s32 (int32x4_t a)
   10979 {
   10980   int32x4_t result;
   10981   __asm__ ("rev64 %0.4s,%1.4s"
   10982            : "=w"(result)
   10983            : "w"(a)
   10984            : /* No clobbers */);
   10985   return result;
   10986 }
   10987 
   10988 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   10989 vrev64q_u8 (uint8x16_t a)
   10990 {
   10991   uint8x16_t result;
   10992   __asm__ ("rev64 %0.16b,%1.16b"
   10993            : "=w"(result)
   10994            : "w"(a)
   10995            : /* No clobbers */);
   10996   return result;
   10997 }
   10998 
   10999 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   11000 vrev64q_u16 (uint16x8_t a)
   11001 {
   11002   uint16x8_t result;
   11003   __asm__ ("rev64 %0.8h,%1.8h"
   11004            : "=w"(result)
   11005            : "w"(a)
   11006            : /* No clobbers */);
   11007   return result;
   11008 }
   11009 
   11010 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   11011 vrev64q_u32 (uint32x4_t a)
   11012 {
   11013   uint32x4_t result;
   11014   __asm__ ("rev64 %0.4s,%1.4s"
   11015            : "=w"(result)
   11016            : "w"(a)
   11017            : /* No clobbers */);
   11018   return result;
   11019 }
   11020 
   11021 #define vrshrn_high_n_s16(a, b, c)                                      \
   11022   __extension__                                                         \
   11023     ({                                                                  \
   11024        int16x8_t b_ = (b);                                              \
   11025        int8x8_t a_ = (a);                                               \
   11026        int8x16_t result = vcombine_s8                                   \
   11027                             (a_, vcreate_s8                             \
   11028                                    (__AARCH64_UINT64_C (0x0)));         \
   11029        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
   11030                 : "+w"(result)                                          \
   11031                 : "w"(b_), "i"(c)                                       \
   11032                 : /* No clobbers */);                                   \
   11033        result;                                                          \
   11034      })
   11035 
   11036 #define vrshrn_high_n_s32(a, b, c)                                      \
   11037   __extension__                                                         \
   11038     ({                                                                  \
   11039        int32x4_t b_ = (b);                                              \
   11040        int16x4_t a_ = (a);                                              \
   11041        int16x8_t result = vcombine_s16                                  \
   11042                             (a_, vcreate_s16                            \
   11043                                    (__AARCH64_UINT64_C (0x0)));         \
   11044        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
   11045                 : "+w"(result)                                          \
   11046                 : "w"(b_), "i"(c)                                       \
   11047                 : /* No clobbers */);                                   \
   11048        result;                                                          \
   11049      })
   11050 
   11051 #define vrshrn_high_n_s64(a, b, c)                                      \
   11052   __extension__                                                         \
   11053     ({                                                                  \
   11054        int64x2_t b_ = (b);                                              \
   11055        int32x2_t a_ = (a);                                              \
   11056        int32x4_t result = vcombine_s32                                  \
   11057                             (a_, vcreate_s32                            \
   11058                                    (__AARCH64_UINT64_C (0x0)));         \
   11059        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
   11060                 : "+w"(result)                                          \
   11061                 : "w"(b_), "i"(c)                                       \
   11062                 : /* No clobbers */);                                   \
   11063        result;                                                          \
   11064      })
   11065 
   11066 #define vrshrn_high_n_u16(a, b, c)                                      \
   11067   __extension__                                                         \
   11068     ({                                                                  \
   11069        uint16x8_t b_ = (b);                                             \
   11070        uint8x8_t a_ = (a);                                              \
   11071        uint8x16_t result = vcombine_u8                                  \
   11072                             (a_, vcreate_u8                             \
   11073                                    (__AARCH64_UINT64_C (0x0)));         \
   11074        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
   11075                 : "+w"(result)                                          \
   11076                 : "w"(b_), "i"(c)                                       \
   11077                 : /* No clobbers */);                                   \
   11078        result;                                                          \
   11079      })
   11080 
   11081 #define vrshrn_high_n_u32(a, b, c)                                      \
   11082   __extension__                                                         \
   11083     ({                                                                  \
   11084        uint32x4_t b_ = (b);                                             \
   11085        uint16x4_t a_ = (a);                                             \
   11086        uint16x8_t result = vcombine_u16                                 \
   11087                             (a_, vcreate_u16                            \
   11088                                    (__AARCH64_UINT64_C (0x0)));         \
   11089        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
   11090                 : "+w"(result)                                          \
   11091                 : "w"(b_), "i"(c)                                       \
   11092                 : /* No clobbers */);                                   \
   11093        result;                                                          \
   11094      })
   11095 
   11096 #define vrshrn_high_n_u64(a, b, c)                                      \
   11097   __extension__                                                         \
   11098     ({                                                                  \
   11099        uint64x2_t b_ = (b);                                             \
   11100        uint32x2_t a_ = (a);                                             \
   11101        uint32x4_t result = vcombine_u32                                 \
   11102                             (a_, vcreate_u32                            \
   11103                                    (__AARCH64_UINT64_C (0x0)));         \
   11104        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
   11105                 : "+w"(result)                                          \
   11106                 : "w"(b_), "i"(c)                                       \
   11107                 : /* No clobbers */);                                   \
   11108        result;                                                          \
   11109      })
   11110 
   11111 #define vrshrn_n_s16(a, b)                                              \
   11112   __extension__                                                         \
   11113     ({                                                                  \
   11114        int16x8_t a_ = (a);                                              \
   11115        int8x8_t result;                                                 \
   11116        __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
   11117                 : "=w"(result)                                          \
   11118                 : "w"(a_), "i"(b)                                       \
   11119                 : /* No clobbers */);                                   \
   11120        result;                                                          \
   11121      })
   11122 
   11123 #define vrshrn_n_s32(a, b)                                              \
   11124   __extension__                                                         \
   11125     ({                                                                  \
   11126        int32x4_t a_ = (a);                                              \
   11127        int16x4_t result;                                                \
   11128        __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
   11129                 : "=w"(result)                                          \
   11130                 : "w"(a_), "i"(b)                                       \
   11131                 : /* No clobbers */);                                   \
   11132        result;                                                          \
   11133      })
   11134 
   11135 #define vrshrn_n_s64(a, b)                                              \
   11136   __extension__                                                         \
   11137     ({                                                                  \
   11138        int64x2_t a_ = (a);                                              \
   11139        int32x2_t result;                                                \
   11140        __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
   11141                 : "=w"(result)                                          \
   11142                 : "w"(a_), "i"(b)                                       \
   11143                 : /* No clobbers */);                                   \
   11144        result;                                                          \
   11145      })
   11146 
   11147 #define vrshrn_n_u16(a, b)                                              \
   11148   __extension__                                                         \
   11149     ({                                                                  \
   11150        uint16x8_t a_ = (a);                                             \
   11151        uint8x8_t result;                                                \
   11152        __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
   11153                 : "=w"(result)                                          \
   11154                 : "w"(a_), "i"(b)                                       \
   11155                 : /* No clobbers */);                                   \
   11156        result;                                                          \
   11157      })
   11158 
   11159 #define vrshrn_n_u32(a, b)                                              \
   11160   __extension__                                                         \
   11161     ({                                                                  \
   11162        uint32x4_t a_ = (a);                                             \
   11163        uint16x4_t result;                                               \
   11164        __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
   11165                 : "=w"(result)                                          \
   11166                 : "w"(a_), "i"(b)                                       \
   11167                 : /* No clobbers */);                                   \
   11168        result;                                                          \
   11169      })
   11170 
   11171 #define vrshrn_n_u64(a, b)                                              \
   11172   __extension__                                                         \
   11173     ({                                                                  \
   11174        uint64x2_t a_ = (a);                                             \
   11175        uint32x2_t result;                                               \
   11176        __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
   11177                 : "=w"(result)                                          \
   11178                 : "w"(a_), "i"(b)                                       \
   11179                 : /* No clobbers */);                                   \
   11180        result;                                                          \
   11181      })
   11182 
   11183 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   11184 vrsqrte_f32 (float32x2_t a)
   11185 {
   11186   float32x2_t result;
   11187   __asm__ ("frsqrte %0.2s,%1.2s"
   11188            : "=w"(result)
   11189            : "w"(a)
   11190            : /* No clobbers */);
   11191   return result;
   11192 }
   11193 
   11194 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   11195 vrsqrte_f64 (float64x1_t a)
   11196 {
   11197   float64x1_t result;
   11198   __asm__ ("frsqrte %d0,%d1"
   11199            : "=w"(result)
   11200            : "w"(a)
   11201            : /* No clobbers */);
   11202   return result;
   11203 }
   11204 
   11205 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   11206 vrsqrte_u32 (uint32x2_t a)
   11207 {
   11208   uint32x2_t result;
   11209   __asm__ ("ursqrte %0.2s,%1.2s"
   11210            : "=w"(result)
   11211            : "w"(a)
   11212            : /* No clobbers */);
   11213   return result;
   11214 }
   11215 
   11216 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   11217 vrsqrted_f64 (float64_t a)
   11218 {
   11219   float64_t result;
   11220   __asm__ ("frsqrte %d0,%d1"
   11221            : "=w"(result)
   11222            : "w"(a)
   11223            : /* No clobbers */);
   11224   return result;
   11225 }
   11226 
   11227 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   11228 vrsqrteq_f32 (float32x4_t a)
   11229 {
   11230   float32x4_t result;
   11231   __asm__ ("frsqrte %0.4s,%1.4s"
   11232            : "=w"(result)
   11233            : "w"(a)
   11234            : /* No clobbers */);
   11235   return result;
   11236 }
   11237 
   11238 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   11239 vrsqrteq_f64 (float64x2_t a)
   11240 {
   11241   float64x2_t result;
   11242   __asm__ ("frsqrte %0.2d,%1.2d"
   11243            : "=w"(result)
   11244            : "w"(a)
   11245            : /* No clobbers */);
   11246   return result;
   11247 }
   11248 
   11249 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   11250 vrsqrteq_u32 (uint32x4_t a)
   11251 {
   11252   uint32x4_t result;
   11253   __asm__ ("ursqrte %0.4s,%1.4s"
   11254            : "=w"(result)
   11255            : "w"(a)
   11256            : /* No clobbers */);
   11257   return result;
   11258 }
   11259 
   11260 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   11261 vrsqrtes_f32 (float32_t a)
   11262 {
   11263   float32_t result;
   11264   __asm__ ("frsqrte %s0,%s1"
   11265            : "=w"(result)
   11266            : "w"(a)
   11267            : /* No clobbers */);
   11268   return result;
   11269 }
   11270 
   11271 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   11272 vrsqrts_f32 (float32x2_t a, float32x2_t b)
   11273 {
   11274   float32x2_t result;
   11275   __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
   11276            : "=w"(result)
   11277            : "w"(a), "w"(b)
   11278            : /* No clobbers */);
   11279   return result;
   11280 }
   11281 
   11282 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   11283 vrsqrtsd_f64 (float64_t a, float64_t b)
   11284 {
   11285   float64_t result;
   11286   __asm__ ("frsqrts %d0,%d1,%d2"
   11287            : "=w"(result)
   11288            : "w"(a), "w"(b)
   11289            : /* No clobbers */);
   11290   return result;
   11291 }
   11292 
   11293 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   11294 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
   11295 {
   11296   float32x4_t result;
   11297   __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
   11298            : "=w"(result)
   11299            : "w"(a), "w"(b)
   11300            : /* No clobbers */);
   11301   return result;
   11302 }
   11303 
   11304 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   11305 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
   11306 {
   11307   float64x2_t result;
   11308   __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
   11309            : "=w"(result)
   11310            : "w"(a), "w"(b)
   11311            : /* No clobbers */);
   11312   return result;
   11313 }
   11314 
   11315 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   11316 vrsqrtss_f32 (float32_t a, float32_t b)
   11317 {
   11318   float32_t result;
   11319   __asm__ ("frsqrts %s0,%s1,%s2"
   11320            : "=w"(result)
   11321            : "w"(a), "w"(b)
   11322            : /* No clobbers */);
   11323   return result;
   11324 }
   11325 
   11326 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   11327 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
   11328 {
   11329   float64x2_t result;
   11330   __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
   11331            : "=w"(result)
   11332            : "w"(a), "w"(b)
   11333            : /* No clobbers */);
   11334   return result;
   11335 }
   11336 
   11337 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   11338 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
   11339 {
   11340   int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   11341   __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
   11342            : "+w"(result)
   11343            : "w"(b), "w"(c)
   11344            : /* No clobbers */);
   11345   return result;
   11346 }
   11347 
   11348 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   11349 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
   11350 {
   11351   int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   11352   __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
   11353            : "+w"(result)
   11354            : "w"(b), "w"(c)
   11355            : /* No clobbers */);
   11356   return result;
   11357 }
   11358 
   11359 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   11360 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
   11361 {
   11362   int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   11363   __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
   11364            : "+w"(result)
   11365            : "w"(b), "w"(c)
   11366            : /* No clobbers */);
   11367   return result;
   11368 }
   11369 
   11370 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   11371 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
   11372 {
   11373   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   11374   __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
   11375            : "+w"(result)
   11376            : "w"(b), "w"(c)
   11377            : /* No clobbers */);
   11378   return result;
   11379 }
   11380 
   11381 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   11382 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
   11383 {
   11384   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   11385   __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
   11386            : "+w"(result)
   11387            : "w"(b), "w"(c)
   11388            : /* No clobbers */);
   11389   return result;
   11390 }
   11391 
   11392 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   11393 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
   11394 {
   11395   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   11396   __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
   11397            : "+w"(result)
   11398            : "w"(b), "w"(c)
   11399            : /* No clobbers */);
   11400   return result;
   11401 }
   11402 
   11403 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   11404 vrsubhn_s16 (int16x8_t a, int16x8_t b)
   11405 {
   11406   int8x8_t result;
   11407   __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
   11408            : "=w"(result)
   11409            : "w"(a), "w"(b)
   11410            : /* No clobbers */);
   11411   return result;
   11412 }
   11413 
   11414 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   11415 vrsubhn_s32 (int32x4_t a, int32x4_t b)
   11416 {
   11417   int16x4_t result;
   11418   __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
   11419            : "=w"(result)
   11420            : "w"(a), "w"(b)
   11421            : /* No clobbers */);
   11422   return result;
   11423 }
   11424 
   11425 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   11426 vrsubhn_s64 (int64x2_t a, int64x2_t b)
   11427 {
   11428   int32x2_t result;
   11429   __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
   11430            : "=w"(result)
   11431            : "w"(a), "w"(b)
   11432            : /* No clobbers */);
   11433   return result;
   11434 }
   11435 
   11436 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   11437 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
   11438 {
   11439   uint8x8_t result;
   11440   __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
   11441            : "=w"(result)
   11442            : "w"(a), "w"(b)
   11443            : /* No clobbers */);
   11444   return result;
   11445 }
   11446 
   11447 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   11448 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
   11449 {
   11450   uint16x4_t result;
   11451   __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
   11452            : "=w"(result)
   11453            : "w"(a), "w"(b)
   11454            : /* No clobbers */);
   11455   return result;
   11456 }
   11457 
   11458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   11459 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
   11460 {
   11461   uint32x2_t result;
   11462   __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
   11463            : "=w"(result)
   11464            : "w"(a), "w"(b)
   11465            : /* No clobbers */);
   11466   return result;
   11467 }
   11468 
   11469 #define vset_lane_f32(a, b, c)                                          \
   11470   __extension__                                                         \
   11471     ({                                                                  \
   11472        float32x2_t b_ = (b);                                            \
   11473        float32_t a_ = (a);                                              \
   11474        float32x2_t result;                                              \
   11475        __asm__ ("ins %0.s[%3], %w1"                                     \
   11476                 : "=w"(result)                                          \
   11477                 : "r"(a_), "0"(b_), "i"(c)                              \
   11478                 : /* No clobbers */);                                   \
   11479        result;                                                          \
   11480      })
   11481 
   11482 #define vset_lane_f64(a, b, c)                                          \
   11483   __extension__                                                         \
   11484     ({                                                                  \
   11485        float64x1_t b_ = (b);                                            \
   11486        float64_t a_ = (a);                                              \
   11487        float64x1_t result;                                              \
   11488        __asm__ ("ins %0.d[%3], %x1"                                     \
   11489                 : "=w"(result)                                          \
   11490                 : "r"(a_), "0"(b_), "i"(c)                              \
   11491                 : /* No clobbers */);                                   \
   11492        result;                                                          \
   11493      })
   11494 
   11495 #define vset_lane_p8(a, b, c)                                           \
   11496   __extension__                                                         \
   11497     ({                                                                  \
   11498        poly8x8_t b_ = (b);                                              \
   11499        poly8_t a_ = (a);                                                \
   11500        poly8x8_t result;                                                \
   11501        __asm__ ("ins %0.b[%3], %w1"                                     \
   11502                 : "=w"(result)                                          \
   11503                 : "r"(a_), "0"(b_), "i"(c)                              \
   11504                 : /* No clobbers */);                                   \
   11505        result;                                                          \
   11506      })
   11507 
   11508 #define vset_lane_p16(a, b, c)                                          \
   11509   __extension__                                                         \
   11510     ({                                                                  \
   11511        poly16x4_t b_ = (b);                                             \
   11512        poly16_t a_ = (a);                                               \
   11513        poly16x4_t result;                                               \
   11514        __asm__ ("ins %0.h[%3], %w1"                                     \
   11515                 : "=w"(result)                                          \
   11516                 : "r"(a_), "0"(b_), "i"(c)                              \
   11517                 : /* No clobbers */);                                   \
   11518        result;                                                          \
   11519      })
   11520 
   11521 #define vset_lane_s8(a, b, c)                                           \
   11522   __extension__                                                         \
   11523     ({                                                                  \
   11524        int8x8_t b_ = (b);                                               \
   11525        int8_t a_ = (a);                                                 \
   11526        int8x8_t result;                                                 \
   11527        __asm__ ("ins %0.b[%3], %w1"                                     \
   11528                 : "=w"(result)                                          \
   11529                 : "r"(a_), "0"(b_), "i"(c)                              \
   11530                 : /* No clobbers */);                                   \
   11531        result;                                                          \
   11532      })
   11533 
   11534 #define vset_lane_s16(a, b, c)                                          \
   11535   __extension__                                                         \
   11536     ({                                                                  \
   11537        int16x4_t b_ = (b);                                              \
   11538        int16_t a_ = (a);                                                \
   11539        int16x4_t result;                                                \
   11540        __asm__ ("ins %0.h[%3], %w1"                                     \
   11541                 : "=w"(result)                                          \
   11542                 : "r"(a_), "0"(b_), "i"(c)                              \
   11543                 : /* No clobbers */);                                   \
   11544        result;                                                          \
   11545      })
   11546 
   11547 #define vset_lane_s32(a, b, c)                                          \
   11548   __extension__                                                         \
   11549     ({                                                                  \
   11550        int32x2_t b_ = (b);                                              \
   11551        int32_t a_ = (a);                                                \
   11552        int32x2_t result;                                                \
   11553        __asm__ ("ins %0.s[%3], %w1"                                     \
   11554                 : "=w"(result)                                          \
   11555                 : "r"(a_), "0"(b_), "i"(c)                              \
   11556                 : /* No clobbers */);                                   \
   11557        result;                                                          \
   11558      })
   11559 
   11560 #define vset_lane_s64(a, b, c)                                          \
   11561   __extension__                                                         \
   11562     ({                                                                  \
   11563        int64x1_t b_ = (b);                                              \
   11564        int64_t a_ = (a);                                                \
   11565        int64x1_t result;                                                \
   11566        __asm__ ("ins %0.d[%3], %x1"                                     \
   11567                 : "=w"(result)                                          \
   11568                 : "r"(a_), "0"(b_), "i"(c)                              \
   11569                 : /* No clobbers */);                                   \
   11570        result;                                                          \
   11571      })
   11572 
   11573 #define vset_lane_u8(a, b, c)                                           \
   11574   __extension__                                                         \
   11575     ({                                                                  \
   11576        uint8x8_t b_ = (b);                                              \
   11577        uint8_t a_ = (a);                                                \
   11578        uint8x8_t result;                                                \
   11579        __asm__ ("ins %0.b[%3], %w1"                                     \
   11580                 : "=w"(result)                                          \
   11581                 : "r"(a_), "0"(b_), "i"(c)                              \
   11582                 : /* No clobbers */);                                   \
   11583        result;                                                          \
   11584      })
   11585 
   11586 #define vset_lane_u16(a, b, c)                                          \
   11587   __extension__                                                         \
   11588     ({                                                                  \
   11589        uint16x4_t b_ = (b);                                             \
   11590        uint16_t a_ = (a);                                               \
   11591        uint16x4_t result;                                               \
   11592        __asm__ ("ins %0.h[%3], %w1"                                     \
   11593                 : "=w"(result)                                          \
   11594                 : "r"(a_), "0"(b_), "i"(c)                              \
   11595                 : /* No clobbers */);                                   \
   11596        result;                                                          \
   11597      })
   11598 
   11599 #define vset_lane_u32(a, b, c)                                          \
   11600   __extension__                                                         \
   11601     ({                                                                  \
   11602        uint32x2_t b_ = (b);                                             \
   11603        uint32_t a_ = (a);                                               \
   11604        uint32x2_t result;                                               \
   11605        __asm__ ("ins %0.s[%3], %w1"                                     \
   11606                 : "=w"(result)                                          \
   11607                 : "r"(a_), "0"(b_), "i"(c)                              \
   11608                 : /* No clobbers */);                                   \
   11609        result;                                                          \
   11610      })
   11611 
   11612 #define vset_lane_u64(a, b, c)                                          \
   11613   __extension__                                                         \
   11614     ({                                                                  \
   11615        uint64x1_t b_ = (b);                                             \
   11616        uint64_t a_ = (a);                                               \
   11617        uint64x1_t result;                                               \
   11618        __asm__ ("ins %0.d[%3], %x1"                                     \
   11619                 : "=w"(result)                                          \
   11620                 : "r"(a_), "0"(b_), "i"(c)                              \
   11621                 : /* No clobbers */);                                   \
   11622        result;                                                          \
   11623      })
   11624 
   11625 #define vsetq_lane_f32(a, b, c)                                         \
   11626   __extension__                                                         \
   11627     ({                                                                  \
   11628        float32x4_t b_ = (b);                                            \
   11629        float32_t a_ = (a);                                              \
   11630        float32x4_t result;                                              \
   11631        __asm__ ("ins %0.s[%3], %w1"                                     \
   11632                 : "=w"(result)                                          \
   11633                 : "r"(a_), "0"(b_), "i"(c)                              \
   11634                 : /* No clobbers */);                                   \
   11635        result;                                                          \
   11636      })
   11637 
   11638 #define vsetq_lane_f64(a, b, c)                                         \
   11639   __extension__                                                         \
   11640     ({                                                                  \
   11641        float64x2_t b_ = (b);                                            \
   11642        float64_t a_ = (a);                                              \
   11643        float64x2_t result;                                              \
   11644        __asm__ ("ins %0.d[%3], %x1"                                     \
   11645                 : "=w"(result)                                          \
   11646                 : "r"(a_), "0"(b_), "i"(c)                              \
   11647                 : /* No clobbers */);                                   \
   11648        result;                                                          \
   11649      })
   11650 
   11651 #define vsetq_lane_p8(a, b, c)                                          \
   11652   __extension__                                                         \
   11653     ({                                                                  \
   11654        poly8x16_t b_ = (b);                                             \
   11655        poly8_t a_ = (a);                                                \
   11656        poly8x16_t result;                                               \
   11657        __asm__ ("ins %0.b[%3], %w1"                                     \
   11658                 : "=w"(result)                                          \
   11659                 : "r"(a_), "0"(b_), "i"(c)                              \
   11660                 : /* No clobbers */);                                   \
   11661        result;                                                          \
   11662      })
   11663 
   11664 #define vsetq_lane_p16(a, b, c)                                         \
   11665   __extension__                                                         \
   11666     ({                                                                  \
   11667        poly16x8_t b_ = (b);                                             \
   11668        poly16_t a_ = (a);                                               \
   11669        poly16x8_t result;                                               \
   11670        __asm__ ("ins %0.h[%3], %w1"                                     \
   11671                 : "=w"(result)                                          \
   11672                 : "r"(a_), "0"(b_), "i"(c)                              \
   11673                 : /* No clobbers */);                                   \
   11674        result;                                                          \
   11675      })
   11676 
   11677 #define vsetq_lane_s8(a, b, c)                                          \
   11678   __extension__                                                         \
   11679     ({                                                                  \
   11680        int8x16_t b_ = (b);                                              \
   11681        int8_t a_ = (a);                                                 \
   11682        int8x16_t result;                                                \
   11683        __asm__ ("ins %0.b[%3], %w1"                                     \
   11684                 : "=w"(result)                                          \
   11685                 : "r"(a_), "0"(b_), "i"(c)                              \
   11686                 : /* No clobbers */);                                   \
   11687        result;                                                          \
   11688      })
   11689 
   11690 #define vsetq_lane_s16(a, b, c)                                         \
   11691   __extension__                                                         \
   11692     ({                                                                  \
   11693        int16x8_t b_ = (b);                                              \
   11694        int16_t a_ = (a);                                                \
   11695        int16x8_t result;                                                \
   11696        __asm__ ("ins %0.h[%3], %w1"                                     \
   11697                 : "=w"(result)                                          \
   11698                 : "r"(a_), "0"(b_), "i"(c)                              \
   11699                 : /* No clobbers */);                                   \
   11700        result;                                                          \
   11701      })
   11702 
   11703 #define vsetq_lane_s32(a, b, c)                                         \
   11704   __extension__                                                         \
   11705     ({                                                                  \
   11706        int32x4_t b_ = (b);                                              \
   11707        int32_t a_ = (a);                                                \
   11708        int32x4_t result;                                                \
   11709        __asm__ ("ins %0.s[%3], %w1"                                     \
   11710                 : "=w"(result)                                          \
   11711                 : "r"(a_), "0"(b_), "i"(c)                              \
   11712                 : /* No clobbers */);                                   \
   11713        result;                                                          \
   11714      })
   11715 
   11716 #define vsetq_lane_s64(a, b, c)                                         \
   11717   __extension__                                                         \
   11718     ({                                                                  \
   11719        int64x2_t b_ = (b);                                              \
   11720        int64_t a_ = (a);                                                \
   11721        int64x2_t result;                                                \
   11722        __asm__ ("ins %0.d[%3], %x1"                                     \
   11723                 : "=w"(result)                                          \
   11724                 : "r"(a_), "0"(b_), "i"(c)                              \
   11725                 : /* No clobbers */);                                   \
   11726        result;                                                          \
   11727      })
   11728 
   11729 #define vsetq_lane_u8(a, b, c)                                          \
   11730   __extension__                                                         \
   11731     ({                                                                  \
   11732        uint8x16_t b_ = (b);                                             \
   11733        uint8_t a_ = (a);                                                \
   11734        uint8x16_t result;                                               \
   11735        __asm__ ("ins %0.b[%3], %w1"                                     \
   11736                 : "=w"(result)                                          \
   11737                 : "r"(a_), "0"(b_), "i"(c)                              \
   11738                 : /* No clobbers */);                                   \
   11739        result;                                                          \
   11740      })
   11741 
   11742 #define vsetq_lane_u16(a, b, c)                                         \
   11743   __extension__                                                         \
   11744     ({                                                                  \
   11745        uint16x8_t b_ = (b);                                             \
   11746        uint16_t a_ = (a);                                               \
   11747        uint16x8_t result;                                               \
   11748        __asm__ ("ins %0.h[%3], %w1"                                     \
   11749                 : "=w"(result)                                          \
   11750                 : "r"(a_), "0"(b_), "i"(c)                              \
   11751                 : /* No clobbers */);                                   \
   11752        result;                                                          \
   11753      })
   11754 
   11755 #define vsetq_lane_u32(a, b, c)                                         \
   11756   __extension__                                                         \
   11757     ({                                                                  \
   11758        uint32x4_t b_ = (b);                                             \
   11759        uint32_t a_ = (a);                                               \
   11760        uint32x4_t result;                                               \
   11761        __asm__ ("ins %0.s[%3], %w1"                                     \
   11762                 : "=w"(result)                                          \
   11763                 : "r"(a_), "0"(b_), "i"(c)                              \
   11764                 : /* No clobbers */);                                   \
   11765        result;                                                          \
   11766      })
   11767 
   11768 #define vsetq_lane_u64(a, b, c)                                         \
   11769   __extension__                                                         \
   11770     ({                                                                  \
   11771        uint64x2_t b_ = (b);                                             \
   11772        uint64_t a_ = (a);                                               \
   11773        uint64x2_t result;                                               \
   11774        __asm__ ("ins %0.d[%3], %x1"                                     \
   11775                 : "=w"(result)                                          \
   11776                 : "r"(a_), "0"(b_), "i"(c)                              \
   11777                 : /* No clobbers */);                                   \
   11778        result;                                                          \
   11779      })
   11780 
   11781 #define vshrn_high_n_s16(a, b, c)                                       \
   11782   __extension__                                                         \
   11783     ({                                                                  \
   11784        int16x8_t b_ = (b);                                              \
   11785        int8x8_t a_ = (a);                                               \
   11786        int8x16_t result = vcombine_s8                                   \
   11787                             (a_, vcreate_s8                             \
   11788                                    (__AARCH64_UINT64_C (0x0)));         \
   11789        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
   11790                 : "+w"(result)                                          \
   11791                 : "w"(b_), "i"(c)                                       \
   11792                 : /* No clobbers */);                                   \
   11793        result;                                                          \
   11794      })
   11795 
   11796 #define vshrn_high_n_s32(a, b, c)                                       \
   11797   __extension__                                                         \
   11798     ({                                                                  \
   11799        int32x4_t b_ = (b);                                              \
   11800        int16x4_t a_ = (a);                                              \
   11801        int16x8_t result = vcombine_s16                                  \
   11802                             (a_, vcreate_s16                            \
   11803                                    (__AARCH64_UINT64_C (0x0)));         \
   11804        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
   11805                 : "+w"(result)                                          \
   11806                 : "w"(b_), "i"(c)                                       \
   11807                 : /* No clobbers */);                                   \
   11808        result;                                                          \
   11809      })
   11810 
   11811 #define vshrn_high_n_s64(a, b, c)                                       \
   11812   __extension__                                                         \
   11813     ({                                                                  \
   11814        int64x2_t b_ = (b);                                              \
   11815        int32x2_t a_ = (a);                                              \
   11816        int32x4_t result = vcombine_s32                                  \
   11817                             (a_, vcreate_s32                            \
   11818                                    (__AARCH64_UINT64_C (0x0)));         \
   11819        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
   11820                 : "+w"(result)                                          \
   11821                 : "w"(b_), "i"(c)                                       \
   11822                 : /* No clobbers */);                                   \
   11823        result;                                                          \
   11824      })
   11825 
   11826 #define vshrn_high_n_u16(a, b, c)                                       \
   11827   __extension__                                                         \
   11828     ({                                                                  \
   11829        uint16x8_t b_ = (b);                                             \
   11830        uint8x8_t a_ = (a);                                              \
   11831        uint8x16_t result = vcombine_u8                                  \
   11832                             (a_, vcreate_u8                             \
   11833                                    (__AARCH64_UINT64_C (0x0)));         \
   11834        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
   11835                 : "+w"(result)                                          \
   11836                 : "w"(b_), "i"(c)                                       \
   11837                 : /* No clobbers */);                                   \
   11838        result;                                                          \
   11839      })
   11840 
   11841 #define vshrn_high_n_u32(a, b, c)                                       \
   11842   __extension__                                                         \
   11843     ({                                                                  \
   11844        uint32x4_t b_ = (b);                                             \
   11845        uint16x4_t a_ = (a);                                             \
   11846        uint16x8_t result = vcombine_u16                                 \
   11847                             (a_, vcreate_u16                            \
   11848                                    (__AARCH64_UINT64_C (0x0)));         \
   11849        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
   11850                 : "+w"(result)                                          \
   11851                 : "w"(b_), "i"(c)                                       \
   11852                 : /* No clobbers */);                                   \
   11853        result;                                                          \
   11854      })
   11855 
   11856 #define vshrn_high_n_u64(a, b, c)                                       \
   11857   __extension__                                                         \
   11858     ({                                                                  \
   11859        uint64x2_t b_ = (b);                                             \
   11860        uint32x2_t a_ = (a);                                             \
   11861        uint32x4_t result = vcombine_u32                                 \
   11862                             (a_, vcreate_u32                            \
   11863                                    (__AARCH64_UINT64_C (0x0)));         \
   11864        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
   11865                 : "+w"(result)                                          \
   11866                 : "w"(b_), "i"(c)                                       \
   11867                 : /* No clobbers */);                                   \
   11868        result;                                                          \
   11869      })
   11870 
   11871 #define vshrn_n_s16(a, b)                                               \
   11872   __extension__                                                         \
   11873     ({                                                                  \
   11874        int16x8_t a_ = (a);                                              \
   11875        int8x8_t result;                                                 \
   11876        __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
   11877                 : "=w"(result)                                          \
   11878                 : "w"(a_), "i"(b)                                       \
   11879                 : /* No clobbers */);                                   \
   11880        result;                                                          \
   11881      })
   11882 
   11883 #define vshrn_n_s32(a, b)                                               \
   11884   __extension__                                                         \
   11885     ({                                                                  \
   11886        int32x4_t a_ = (a);                                              \
   11887        int16x4_t result;                                                \
   11888        __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
   11889                 : "=w"(result)                                          \
   11890                 : "w"(a_), "i"(b)                                       \
   11891                 : /* No clobbers */);                                   \
   11892        result;                                                          \
   11893      })
   11894 
   11895 #define vshrn_n_s64(a, b)                                               \
   11896   __extension__                                                         \
   11897     ({                                                                  \
   11898        int64x2_t a_ = (a);                                              \
   11899        int32x2_t result;                                                \
   11900        __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
   11901                 : "=w"(result)                                          \
   11902                 : "w"(a_), "i"(b)                                       \
   11903                 : /* No clobbers */);                                   \
   11904        result;                                                          \
   11905      })
   11906 
   11907 #define vshrn_n_u16(a, b)                                               \
   11908   __extension__                                                         \
   11909     ({                                                                  \
   11910        uint16x8_t a_ = (a);                                             \
   11911        uint8x8_t result;                                                \
   11912        __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
   11913                 : "=w"(result)                                          \
   11914                 : "w"(a_), "i"(b)                                       \
   11915                 : /* No clobbers */);                                   \
   11916        result;                                                          \
   11917      })
   11918 
   11919 #define vshrn_n_u32(a, b)                                               \
   11920   __extension__                                                         \
   11921     ({                                                                  \
   11922        uint32x4_t a_ = (a);                                             \
   11923        uint16x4_t result;                                               \
   11924        __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
   11925                 : "=w"(result)                                          \
   11926                 : "w"(a_), "i"(b)                                       \
   11927                 : /* No clobbers */);                                   \
   11928        result;                                                          \
   11929      })
   11930 
   11931 #define vshrn_n_u64(a, b)                                               \
   11932   __extension__                                                         \
   11933     ({                                                                  \
   11934        uint64x2_t a_ = (a);                                             \
   11935        uint32x2_t result;                                               \
   11936        __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
   11937                 : "=w"(result)                                          \
   11938                 : "w"(a_), "i"(b)                                       \
   11939                 : /* No clobbers */);                                   \
   11940        result;                                                          \
   11941      })
   11942 
   11943 #define vsli_n_p8(a, b, c)                                              \
   11944   __extension__                                                         \
   11945     ({                                                                  \
   11946        poly8x8_t b_ = (b);                                              \
   11947        poly8x8_t a_ = (a);                                              \
   11948        poly8x8_t result;                                                \
   11949        __asm__ ("sli %0.8b,%2.8b,%3"                                    \
   11950                 : "=w"(result)                                          \
   11951                 : "0"(a_), "w"(b_), "i"(c)                              \
   11952                 : /* No clobbers */);                                   \
   11953        result;                                                          \
   11954      })
   11955 
   11956 #define vsli_n_p16(a, b, c)                                             \
   11957   __extension__                                                         \
   11958     ({                                                                  \
   11959        poly16x4_t b_ = (b);                                             \
   11960        poly16x4_t a_ = (a);                                             \
   11961        poly16x4_t result;                                               \
   11962        __asm__ ("sli %0.4h,%2.4h,%3"                                    \
   11963                 : "=w"(result)                                          \
   11964                 : "0"(a_), "w"(b_), "i"(c)                              \
   11965                 : /* No clobbers */);                                   \
   11966        result;                                                          \
   11967      })
   11968 
   11969 #define vsliq_n_p8(a, b, c)                                             \
   11970   __extension__                                                         \
   11971     ({                                                                  \
   11972        poly8x16_t b_ = (b);                                             \
   11973        poly8x16_t a_ = (a);                                             \
   11974        poly8x16_t result;                                               \
   11975        __asm__ ("sli %0.16b,%2.16b,%3"                                  \
   11976                 : "=w"(result)                                          \
   11977                 : "0"(a_), "w"(b_), "i"(c)                              \
   11978                 : /* No clobbers */);                                   \
   11979        result;                                                          \
   11980      })
   11981 
   11982 #define vsliq_n_p16(a, b, c)                                            \
   11983   __extension__                                                         \
   11984     ({                                                                  \
   11985        poly16x8_t b_ = (b);                                             \
   11986        poly16x8_t a_ = (a);                                             \
   11987        poly16x8_t result;                                               \
   11988        __asm__ ("sli %0.8h,%2.8h,%3"                                    \
   11989                 : "=w"(result)                                          \
   11990                 : "0"(a_), "w"(b_), "i"(c)                              \
   11991                 : /* No clobbers */);                                   \
   11992        result;                                                          \
   11993      })
   11994 
   11995 #define vsri_n_p8(a, b, c)                                              \
   11996   __extension__                                                         \
   11997     ({                                                                  \
   11998        poly8x8_t b_ = (b);                                              \
   11999        poly8x8_t a_ = (a);                                              \
   12000        poly8x8_t result;                                                \
   12001        __asm__ ("sri %0.8b,%2.8b,%3"                                    \
   12002                 : "=w"(result)                                          \
   12003                 : "0"(a_), "w"(b_), "i"(c)                              \
   12004                 : /* No clobbers */);                                   \
   12005        result;                                                          \
   12006      })
   12007 
   12008 #define vsri_n_p16(a, b, c)                                             \
   12009   __extension__                                                         \
   12010     ({                                                                  \
   12011        poly16x4_t b_ = (b);                                             \
   12012        poly16x4_t a_ = (a);                                             \
   12013        poly16x4_t result;                                               \
   12014        __asm__ ("sri %0.4h,%2.4h,%3"                                    \
   12015                 : "=w"(result)                                          \
   12016                 : "0"(a_), "w"(b_), "i"(c)                              \
   12017                 : /* No clobbers */);                                   \
   12018        result;                                                          \
   12019      })
   12020 
   12021 #define vsriq_n_p8(a, b, c)                                             \
   12022   __extension__                                                         \
   12023     ({                                                                  \
   12024        poly8x16_t b_ = (b);                                             \
   12025        poly8x16_t a_ = (a);                                             \
   12026        poly8x16_t result;                                               \
   12027        __asm__ ("sri %0.16b,%2.16b,%3"                                  \
   12028                 : "=w"(result)                                          \
   12029                 : "0"(a_), "w"(b_), "i"(c)                              \
   12030                 : /* No clobbers */);                                   \
   12031        result;                                                          \
   12032      })
   12033 
   12034 #define vsriq_n_p16(a, b, c)                                            \
   12035   __extension__                                                         \
   12036     ({                                                                  \
   12037        poly16x8_t b_ = (b);                                             \
   12038        poly16x8_t a_ = (a);                                             \
   12039        poly16x8_t result;                                               \
   12040        __asm__ ("sri %0.8h,%2.8h,%3"                                    \
   12041                 : "=w"(result)                                          \
   12042                 : "0"(a_), "w"(b_), "i"(c)                              \
   12043                 : /* No clobbers */);                                   \
   12044        result;                                                          \
   12045      })
   12046 
   12047 #define vst1_lane_f32(a, b, c)                                          \
   12048   __extension__                                                         \
   12049     ({                                                                  \
   12050        float32x2_t b_ = (b);                                            \
   12051        float32_t * a_ = (a);                                            \
   12052        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
   12053                 :                                                       \
   12054                 : "r"(a_), "w"(b_), "i"(c)                              \
   12055                 : "memory");                                            \
   12056      })
   12057 
   12058 #define vst1_lane_f64(a, b, c)                                          \
   12059   __extension__                                                         \
   12060     ({                                                                  \
   12061        float64x1_t b_ = (b);                                            \
   12062        float64_t * a_ = (a);                                            \
   12063        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
   12064                 :                                                       \
   12065                 : "r"(a_), "w"(b_), "i"(c)                              \
   12066                 : "memory");                                            \
   12067      })
   12068 
   12069 #define vst1_lane_p8(a, b, c)                                           \
   12070   __extension__                                                         \
   12071     ({                                                                  \
   12072        poly8x8_t b_ = (b);                                              \
   12073        poly8_t * a_ = (a);                                              \
   12074        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
   12075                 :                                                       \
   12076                 : "r"(a_), "w"(b_), "i"(c)                              \
   12077                 : "memory");                                            \
   12078      })
   12079 
   12080 #define vst1_lane_p16(a, b, c)                                          \
   12081   __extension__                                                         \
   12082     ({                                                                  \
   12083        poly16x4_t b_ = (b);                                             \
   12084        poly16_t * a_ = (a);                                             \
   12085        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
   12086                 :                                                       \
   12087                 : "r"(a_), "w"(b_), "i"(c)                              \
   12088                 : "memory");                                            \
   12089      })
   12090 
   12091 #define vst1_lane_s8(a, b, c)                                           \
   12092   __extension__                                                         \
   12093     ({                                                                  \
   12094        int8x8_t b_ = (b);                                               \
   12095        int8_t * a_ = (a);                                               \
   12096        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
   12097                 :                                                       \
   12098                 : "r"(a_), "w"(b_), "i"(c)                              \
   12099                 : "memory");                                            \
   12100      })
   12101 
   12102 #define vst1_lane_s16(a, b, c)                                          \
   12103   __extension__                                                         \
   12104     ({                                                                  \
   12105        int16x4_t b_ = (b);                                              \
   12106        int16_t * a_ = (a);                                              \
   12107        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
   12108                 :                                                       \
   12109                 : "r"(a_), "w"(b_), "i"(c)                              \
   12110                 : "memory");                                            \
   12111      })
   12112 
   12113 #define vst1_lane_s32(a, b, c)                                          \
   12114   __extension__                                                         \
   12115     ({                                                                  \
   12116        int32x2_t b_ = (b);                                              \
   12117        int32_t * a_ = (a);                                              \
   12118        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
   12119                 :                                                       \
   12120                 : "r"(a_), "w"(b_), "i"(c)                              \
   12121                 : "memory");                                            \
   12122      })
   12123 
   12124 #define vst1_lane_s64(a, b, c)                                          \
   12125   __extension__                                                         \
   12126     ({                                                                  \
   12127        int64x1_t b_ = (b);                                              \
   12128        int64_t * a_ = (a);                                              \
   12129        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
   12130                 :                                                       \
   12131                 : "r"(a_), "w"(b_), "i"(c)                              \
   12132                 : "memory");                                            \
   12133      })
   12134 
   12135 #define vst1_lane_u8(a, b, c)                                           \
   12136   __extension__                                                         \
   12137     ({                                                                  \
   12138        uint8x8_t b_ = (b);                                              \
   12139        uint8_t * a_ = (a);                                              \
   12140        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
   12141                 :                                                       \
   12142                 : "r"(a_), "w"(b_), "i"(c)                              \
   12143                 : "memory");                                            \
   12144      })
   12145 
   12146 #define vst1_lane_u16(a, b, c)                                          \
   12147   __extension__                                                         \
   12148     ({                                                                  \
   12149        uint16x4_t b_ = (b);                                             \
   12150        uint16_t * a_ = (a);                                             \
   12151        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
   12152                 :                                                       \
   12153                 : "r"(a_), "w"(b_), "i"(c)                              \
   12154                 : "memory");                                            \
   12155      })
   12156 
   12157 #define vst1_lane_u32(a, b, c)                                          \
   12158   __extension__                                                         \
   12159     ({                                                                  \
   12160        uint32x2_t b_ = (b);                                             \
   12161        uint32_t * a_ = (a);                                             \
   12162        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
   12163                 :                                                       \
   12164                 : "r"(a_), "w"(b_), "i"(c)                              \
   12165                 : "memory");                                            \
   12166      })
   12167 
   12168 #define vst1_lane_u64(a, b, c)                                          \
   12169   __extension__                                                         \
   12170     ({                                                                  \
   12171        uint64x1_t b_ = (b);                                             \
   12172        uint64_t * a_ = (a);                                             \
   12173        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
   12174                 :                                                       \
   12175                 : "r"(a_), "w"(b_), "i"(c)                              \
   12176                 : "memory");                                            \
   12177      })
   12178 
   12179 
   12180 #define vst1q_lane_f32(a, b, c)                                         \
   12181   __extension__                                                         \
   12182     ({                                                                  \
   12183        float32x4_t b_ = (b);                                            \
   12184        float32_t * a_ = (a);                                            \
   12185        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
   12186                 :                                                       \
   12187                 : "r"(a_), "w"(b_), "i"(c)                              \
   12188                 : "memory");                                            \
   12189      })
   12190 
   12191 #define vst1q_lane_f64(a, b, c)                                         \
   12192   __extension__                                                         \
   12193     ({                                                                  \
   12194        float64x2_t b_ = (b);                                            \
   12195        float64_t * a_ = (a);                                            \
   12196        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
   12197                 :                                                       \
   12198                 : "r"(a_), "w"(b_), "i"(c)                              \
   12199                 : "memory");                                            \
   12200      })
   12201 
   12202 #define vst1q_lane_p8(a, b, c)                                          \
   12203   __extension__                                                         \
   12204     ({                                                                  \
   12205        poly8x16_t b_ = (b);                                             \
   12206        poly8_t * a_ = (a);                                              \
   12207        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
   12208                 :                                                       \
   12209                 : "r"(a_), "w"(b_), "i"(c)                              \
   12210                 : "memory");                                            \
   12211      })
   12212 
   12213 #define vst1q_lane_p16(a, b, c)                                         \
   12214   __extension__                                                         \
   12215     ({                                                                  \
   12216        poly16x8_t b_ = (b);                                             \
   12217        poly16_t * a_ = (a);                                             \
   12218        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
   12219                 :                                                       \
   12220                 : "r"(a_), "w"(b_), "i"(c)                              \
   12221                 : "memory");                                            \
   12222      })
   12223 
   12224 #define vst1q_lane_s8(a, b, c)                                          \
   12225   __extension__                                                         \
   12226     ({                                                                  \
   12227        int8x16_t b_ = (b);                                              \
   12228        int8_t * a_ = (a);                                               \
   12229        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
   12230                 :                                                       \
   12231                 : "r"(a_), "w"(b_), "i"(c)                              \
   12232                 : "memory");                                            \
   12233      })
   12234 
   12235 #define vst1q_lane_s16(a, b, c)                                         \
   12236   __extension__                                                         \
   12237     ({                                                                  \
   12238        int16x8_t b_ = (b);                                              \
   12239        int16_t * a_ = (a);                                              \
   12240        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
   12241                 :                                                       \
   12242                 : "r"(a_), "w"(b_), "i"(c)                              \
   12243                 : "memory");                                            \
   12244      })
   12245 
   12246 #define vst1q_lane_s32(a, b, c)                                         \
   12247   __extension__                                                         \
   12248     ({                                                                  \
   12249        int32x4_t b_ = (b);                                              \
   12250        int32_t * a_ = (a);                                              \
   12251        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
   12252                 :                                                       \
   12253                 : "r"(a_), "w"(b_), "i"(c)                              \
   12254                 : "memory");                                            \
   12255      })
   12256 
   12257 #define vst1q_lane_s64(a, b, c)                                         \
   12258   __extension__                                                         \
   12259     ({                                                                  \
   12260        int64x2_t b_ = (b);                                              \
   12261        int64_t * a_ = (a);                                              \
   12262        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
   12263                 :                                                       \
   12264                 : "r"(a_), "w"(b_), "i"(c)                              \
   12265                 : "memory");                                            \
   12266      })
   12267 
   12268 #define vst1q_lane_u8(a, b, c)                                          \
   12269   __extension__                                                         \
   12270     ({                                                                  \
   12271        uint8x16_t b_ = (b);                                             \
   12272        uint8_t * a_ = (a);                                              \
   12273        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
   12274                 :                                                       \
   12275                 : "r"(a_), "w"(b_), "i"(c)                              \
   12276                 : "memory");                                            \
   12277      })
   12278 
   12279 #define vst1q_lane_u16(a, b, c)                                         \
   12280   __extension__                                                         \
   12281     ({                                                                  \
   12282        uint16x8_t b_ = (b);                                             \
   12283        uint16_t * a_ = (a);                                             \
   12284        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
   12285                 :                                                       \
   12286                 : "r"(a_), "w"(b_), "i"(c)                              \
   12287                 : "memory");                                            \
   12288      })
   12289 
   12290 #define vst1q_lane_u32(a, b, c)                                         \
   12291   __extension__                                                         \
   12292     ({                                                                  \
   12293        uint32x4_t b_ = (b);                                             \
   12294        uint32_t * a_ = (a);                                             \
   12295        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
   12296                 :                                                       \
   12297                 : "r"(a_), "w"(b_), "i"(c)                              \
   12298                 : "memory");                                            \
   12299      })
   12300 
   12301 #define vst1q_lane_u64(a, b, c)                                         \
   12302   __extension__                                                         \
   12303     ({                                                                  \
   12304        uint64x2_t b_ = (b);                                             \
   12305        uint64_t * a_ = (a);                                             \
   12306        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
   12307                 :                                                       \
   12308                 : "r"(a_), "w"(b_), "i"(c)                              \
   12309                 : "memory");                                            \
   12310      })
   12311 
   12312 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   12313 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
   12314 {
   12315   int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   12316   __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
   12317            : "+w"(result)
   12318            : "w"(b), "w"(c)
   12319            : /* No clobbers */);
   12320   return result;
   12321 }
   12322 
   12323 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   12324 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
   12325 {
   12326   int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   12327   __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
   12328            : "+w"(result)
   12329            : "w"(b), "w"(c)
   12330            : /* No clobbers */);
   12331   return result;
   12332 }
   12333 
   12334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   12335 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
   12336 {
   12337   int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   12338   __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
   12339            : "+w"(result)
   12340            : "w"(b), "w"(c)
   12341            : /* No clobbers */);
   12342   return result;
   12343 }
   12344 
   12345 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   12346 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
   12347 {
   12348   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   12349   __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
   12350            : "+w"(result)
   12351            : "w"(b), "w"(c)
   12352            : /* No clobbers */);
   12353   return result;
   12354 }
   12355 
   12356 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   12357 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
   12358 {
   12359   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   12360   __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
   12361            : "+w"(result)
   12362            : "w"(b), "w"(c)
   12363            : /* No clobbers */);
   12364   return result;
   12365 }
   12366 
   12367 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   12368 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
   12369 {
   12370   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   12371   __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
   12372            : "+w"(result)
   12373            : "w"(b), "w"(c)
   12374            : /* No clobbers */);
   12375   return result;
   12376 }
   12377 
   12378 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   12379 vsubhn_s16 (int16x8_t a, int16x8_t b)
   12380 {
   12381   int8x8_t result;
   12382   __asm__ ("subhn %0.8b, %1.8h, %2.8h"
   12383            : "=w"(result)
   12384            : "w"(a), "w"(b)
   12385            : /* No clobbers */);
   12386   return result;
   12387 }
   12388 
   12389 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   12390 vsubhn_s32 (int32x4_t a, int32x4_t b)
   12391 {
   12392   int16x4_t result;
   12393   __asm__ ("subhn %0.4h, %1.4s, %2.4s"
   12394            : "=w"(result)
   12395            : "w"(a), "w"(b)
   12396            : /* No clobbers */);
   12397   return result;
   12398 }
   12399 
   12400 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   12401 vsubhn_s64 (int64x2_t a, int64x2_t b)
   12402 {
   12403   int32x2_t result;
   12404   __asm__ ("subhn %0.2s, %1.2d, %2.2d"
   12405            : "=w"(result)
   12406            : "w"(a), "w"(b)
   12407            : /* No clobbers */);
   12408   return result;
   12409 }
   12410 
   12411 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   12412 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
   12413 {
   12414   uint8x8_t result;
   12415   __asm__ ("subhn %0.8b, %1.8h, %2.8h"
   12416            : "=w"(result)
   12417            : "w"(a), "w"(b)
   12418            : /* No clobbers */);
   12419   return result;
   12420 }
   12421 
   12422 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   12423 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
   12424 {
   12425   uint16x4_t result;
   12426   __asm__ ("subhn %0.4h, %1.4s, %2.4s"
   12427            : "=w"(result)
   12428            : "w"(a), "w"(b)
   12429            : /* No clobbers */);
   12430   return result;
   12431 }
   12432 
   12433 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   12434 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
   12435 {
   12436   uint32x2_t result;
   12437   __asm__ ("subhn %0.2s, %1.2d, %2.2d"
   12438            : "=w"(result)
   12439            : "w"(a), "w"(b)
   12440            : /* No clobbers */);
   12441   return result;
   12442 }
   12443 
   12444 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   12445 vtrn1_f32 (float32x2_t a, float32x2_t b)
   12446 {
   12447   float32x2_t result;
   12448   __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
   12449            : "=w"(result)
   12450            : "w"(a), "w"(b)
   12451            : /* No clobbers */);
   12452   return result;
   12453 }
   12454 
   12455 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   12456 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
   12457 {
   12458   poly8x8_t result;
   12459   __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
   12460            : "=w"(result)
   12461            : "w"(a), "w"(b)
   12462            : /* No clobbers */);
   12463   return result;
   12464 }
   12465 
   12466 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   12467 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
   12468 {
   12469   poly16x4_t result;
   12470   __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
   12471            : "=w"(result)
   12472            : "w"(a), "w"(b)
   12473            : /* No clobbers */);
   12474   return result;
   12475 }
   12476 
   12477 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   12478 vtrn1_s8 (int8x8_t a, int8x8_t b)
   12479 {
   12480   int8x8_t result;
   12481   __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
   12482            : "=w"(result)
   12483            : "w"(a), "w"(b)
   12484            : /* No clobbers */);
   12485   return result;
   12486 }
   12487 
   12488 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   12489 vtrn1_s16 (int16x4_t a, int16x4_t b)
   12490 {
   12491   int16x4_t result;
   12492   __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
   12493            : "=w"(result)
   12494            : "w"(a), "w"(b)
   12495            : /* No clobbers */);
   12496   return result;
   12497 }
   12498 
   12499 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   12500 vtrn1_s32 (int32x2_t a, int32x2_t b)
   12501 {
   12502   int32x2_t result;
   12503   __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
   12504            : "=w"(result)
   12505            : "w"(a), "w"(b)
   12506            : /* No clobbers */);
   12507   return result;
   12508 }
   12509 
   12510 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   12511 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
   12512 {
   12513   uint8x8_t result;
   12514   __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
   12515            : "=w"(result)
   12516            : "w"(a), "w"(b)
   12517            : /* No clobbers */);
   12518   return result;
   12519 }
   12520 
   12521 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   12522 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
   12523 {
   12524   uint16x4_t result;
   12525   __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
   12526            : "=w"(result)
   12527            : "w"(a), "w"(b)
   12528            : /* No clobbers */);
   12529   return result;
   12530 }
   12531 
   12532 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   12533 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
   12534 {
   12535   uint32x2_t result;
   12536   __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
   12537            : "=w"(result)
   12538            : "w"(a), "w"(b)
   12539            : /* No clobbers */);
   12540   return result;
   12541 }
   12542 
   12543 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   12544 vtrn1q_f32 (float32x4_t a, float32x4_t b)
   12545 {
   12546   float32x4_t result;
   12547   __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
   12548            : "=w"(result)
   12549            : "w"(a), "w"(b)
   12550            : /* No clobbers */);
   12551   return result;
   12552 }
   12553 
   12554 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   12555 vtrn1q_f64 (float64x2_t a, float64x2_t b)
   12556 {
   12557   float64x2_t result;
   12558   __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
   12559            : "=w"(result)
   12560            : "w"(a), "w"(b)
   12561            : /* No clobbers */);
   12562   return result;
   12563 }
   12564 
   12565 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   12566 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
   12567 {
   12568   poly8x16_t result;
   12569   __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
   12570            : "=w"(result)
   12571            : "w"(a), "w"(b)
   12572            : /* No clobbers */);
   12573   return result;
   12574 }
   12575 
   12576 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   12577 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
   12578 {
   12579   poly16x8_t result;
   12580   __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
   12581            : "=w"(result)
   12582            : "w"(a), "w"(b)
   12583            : /* No clobbers */);
   12584   return result;
   12585 }
   12586 
   12587 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   12588 vtrn1q_s8 (int8x16_t a, int8x16_t b)
   12589 {
   12590   int8x16_t result;
   12591   __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
   12592            : "=w"(result)
   12593            : "w"(a), "w"(b)
   12594            : /* No clobbers */);
   12595   return result;
   12596 }
   12597 
   12598 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   12599 vtrn1q_s16 (int16x8_t a, int16x8_t b)
   12600 {
   12601   int16x8_t result;
   12602   __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
   12603            : "=w"(result)
   12604            : "w"(a), "w"(b)
   12605            : /* No clobbers */);
   12606   return result;
   12607 }
   12608 
   12609 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   12610 vtrn1q_s32 (int32x4_t a, int32x4_t b)
   12611 {
   12612   int32x4_t result;
   12613   __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
   12614            : "=w"(result)
   12615            : "w"(a), "w"(b)
   12616            : /* No clobbers */);
   12617   return result;
   12618 }
   12619 
   12620 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   12621 vtrn1q_s64 (int64x2_t a, int64x2_t b)
   12622 {
   12623   int64x2_t result;
   12624   __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
   12625            : "=w"(result)
   12626            : "w"(a), "w"(b)
   12627            : /* No clobbers */);
   12628   return result;
   12629 }
   12630 
   12631 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   12632 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
   12633 {
   12634   uint8x16_t result;
   12635   __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
   12636            : "=w"(result)
   12637            : "w"(a), "w"(b)
   12638            : /* No clobbers */);
   12639   return result;
   12640 }
   12641 
   12642 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   12643 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
   12644 {
   12645   uint16x8_t result;
   12646   __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
   12647            : "=w"(result)
   12648            : "w"(a), "w"(b)
   12649            : /* No clobbers */);
   12650   return result;
   12651 }
   12652 
   12653 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   12654 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
   12655 {
   12656   uint32x4_t result;
   12657   __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
   12658            : "=w"(result)
   12659            : "w"(a), "w"(b)
   12660            : /* No clobbers */);
   12661   return result;
   12662 }
   12663 
   12664 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   12665 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
   12666 {
   12667   uint64x2_t result;
   12668   __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
   12669            : "=w"(result)
   12670            : "w"(a), "w"(b)
   12671            : /* No clobbers */);
   12672   return result;
   12673 }
   12674 
   12675 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   12676 vtrn2_f32 (float32x2_t a, float32x2_t b)
   12677 {
   12678   float32x2_t result;
   12679   __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
   12680            : "=w"(result)
   12681            : "w"(a), "w"(b)
   12682            : /* No clobbers */);
   12683   return result;
   12684 }
   12685 
   12686 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   12687 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
   12688 {
   12689   poly8x8_t result;
   12690   __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
   12691            : "=w"(result)
   12692            : "w"(a), "w"(b)
   12693            : /* No clobbers */);
   12694   return result;
   12695 }
   12696 
   12697 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   12698 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
   12699 {
   12700   poly16x4_t result;
   12701   __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
   12702            : "=w"(result)
   12703            : "w"(a), "w"(b)
   12704            : /* No clobbers */);
   12705   return result;
   12706 }
   12707 
   12708 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   12709 vtrn2_s8 (int8x8_t a, int8x8_t b)
   12710 {
   12711   int8x8_t result;
   12712   __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
   12713            : "=w"(result)
   12714            : "w"(a), "w"(b)
   12715            : /* No clobbers */);
   12716   return result;
   12717 }
   12718 
   12719 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   12720 vtrn2_s16 (int16x4_t a, int16x4_t b)
   12721 {
   12722   int16x4_t result;
   12723   __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
   12724            : "=w"(result)
   12725            : "w"(a), "w"(b)
   12726            : /* No clobbers */);
   12727   return result;
   12728 }
   12729 
   12730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   12731 vtrn2_s32 (int32x2_t a, int32x2_t b)
   12732 {
   12733   int32x2_t result;
   12734   __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
   12735            : "=w"(result)
   12736            : "w"(a), "w"(b)
   12737            : /* No clobbers */);
   12738   return result;
   12739 }
   12740 
   12741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   12742 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
   12743 {
   12744   uint8x8_t result;
   12745   __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
   12746            : "=w"(result)
   12747            : "w"(a), "w"(b)
   12748            : /* No clobbers */);
   12749   return result;
   12750 }
   12751 
   12752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   12753 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
   12754 {
   12755   uint16x4_t result;
   12756   __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
   12757            : "=w"(result)
   12758            : "w"(a), "w"(b)
   12759            : /* No clobbers */);
   12760   return result;
   12761 }
   12762 
   12763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   12764 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
   12765 {
   12766   uint32x2_t result;
   12767   __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
   12768            : "=w"(result)
   12769            : "w"(a), "w"(b)
   12770            : /* No clobbers */);
   12771   return result;
   12772 }
   12773 
   12774 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   12775 vtrn2q_f32 (float32x4_t a, float32x4_t b)
   12776 {
   12777   float32x4_t result;
   12778   __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
   12779            : "=w"(result)
   12780            : "w"(a), "w"(b)
   12781            : /* No clobbers */);
   12782   return result;
   12783 }
   12784 
   12785 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   12786 vtrn2q_f64 (float64x2_t a, float64x2_t b)
   12787 {
   12788   float64x2_t result;
   12789   __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
   12790            : "=w"(result)
   12791            : "w"(a), "w"(b)
   12792            : /* No clobbers */);
   12793   return result;
   12794 }
   12795 
   12796 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   12797 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
   12798 {
   12799   poly8x16_t result;
   12800   __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
   12801            : "=w"(result)
   12802            : "w"(a), "w"(b)
   12803            : /* No clobbers */);
   12804   return result;
   12805 }
   12806 
   12807 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   12808 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
   12809 {
   12810   poly16x8_t result;
   12811   __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
   12812            : "=w"(result)
   12813            : "w"(a), "w"(b)
   12814            : /* No clobbers */);
   12815   return result;
   12816 }
   12817 
   12818 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   12819 vtrn2q_s8 (int8x16_t a, int8x16_t b)
   12820 {
   12821   int8x16_t result;
   12822   __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
   12823            : "=w"(result)
   12824            : "w"(a), "w"(b)
   12825            : /* No clobbers */);
   12826   return result;
   12827 }
   12828 
   12829 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   12830 vtrn2q_s16 (int16x8_t a, int16x8_t b)
   12831 {
   12832   int16x8_t result;
   12833   __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
   12834            : "=w"(result)
   12835            : "w"(a), "w"(b)
   12836            : /* No clobbers */);
   12837   return result;
   12838 }
   12839 
   12840 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   12841 vtrn2q_s32 (int32x4_t a, int32x4_t b)
   12842 {
   12843   int32x4_t result;
   12844   __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
   12845            : "=w"(result)
   12846            : "w"(a), "w"(b)
   12847            : /* No clobbers */);
   12848   return result;
   12849 }
   12850 
   12851 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   12852 vtrn2q_s64 (int64x2_t a, int64x2_t b)
   12853 {
   12854   int64x2_t result;
   12855   __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
   12856            : "=w"(result)
   12857            : "w"(a), "w"(b)
   12858            : /* No clobbers */);
   12859   return result;
   12860 }
   12861 
   12862 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   12863 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
   12864 {
   12865   uint8x16_t result;
   12866   __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
   12867            : "=w"(result)
   12868            : "w"(a), "w"(b)
   12869            : /* No clobbers */);
   12870   return result;
   12871 }
   12872 
   12873 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   12874 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
   12875 {
   12876   uint16x8_t result;
   12877   __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
   12878            : "=w"(result)
   12879            : "w"(a), "w"(b)
   12880            : /* No clobbers */);
   12881   return result;
   12882 }
   12883 
   12884 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   12885 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
   12886 {
   12887   uint32x4_t result;
   12888   __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
   12889            : "=w"(result)
   12890            : "w"(a), "w"(b)
   12891            : /* No clobbers */);
   12892   return result;
   12893 }
   12894 
   12895 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   12896 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
   12897 {
   12898   uint64x2_t result;
   12899   __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
   12900            : "=w"(result)
   12901            : "w"(a), "w"(b)
   12902            : /* No clobbers */);
   12903   return result;
   12904 }
   12905 
   12906 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   12907 vtst_p8 (poly8x8_t a, poly8x8_t b)
   12908 {
   12909   uint8x8_t result;
   12910   __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
   12911            : "=w"(result)
   12912            : "w"(a), "w"(b)
   12913            : /* No clobbers */);
   12914   return result;
   12915 }
   12916 
   12917 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   12918 vtst_p16 (poly16x4_t a, poly16x4_t b)
   12919 {
   12920   uint16x4_t result;
   12921   __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
   12922            : "=w"(result)
   12923            : "w"(a), "w"(b)
   12924            : /* No clobbers */);
   12925   return result;
   12926 }
   12927 
   12928 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   12929 vtstq_p8 (poly8x16_t a, poly8x16_t b)
   12930 {
   12931   uint8x16_t result;
   12932   __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
   12933            : "=w"(result)
   12934            : "w"(a), "w"(b)
   12935            : /* No clobbers */);
   12936   return result;
   12937 }
   12938 
   12939 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   12940 vtstq_p16 (poly16x8_t a, poly16x8_t b)
   12941 {
   12942   uint16x8_t result;
   12943   __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
   12944            : "=w"(result)
   12945            : "w"(a), "w"(b)
   12946            : /* No clobbers */);
   12947   return result;
   12948 }
   12949 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   12950 vuzp1_f32 (float32x2_t a, float32x2_t b)
   12951 {
   12952   float32x2_t result;
   12953   __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
   12954            : "=w"(result)
   12955            : "w"(a), "w"(b)
   12956            : /* No clobbers */);
   12957   return result;
   12958 }
   12959 
   12960 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   12961 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
   12962 {
   12963   poly8x8_t result;
   12964   __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
   12965            : "=w"(result)
   12966            : "w"(a), "w"(b)
   12967            : /* No clobbers */);
   12968   return result;
   12969 }
   12970 
   12971 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   12972 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
   12973 {
   12974   poly16x4_t result;
   12975   __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
   12976            : "=w"(result)
   12977            : "w"(a), "w"(b)
   12978            : /* No clobbers */);
   12979   return result;
   12980 }
   12981 
   12982 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   12983 vuzp1_s8 (int8x8_t a, int8x8_t b)
   12984 {
   12985   int8x8_t result;
   12986   __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
   12987            : "=w"(result)
   12988            : "w"(a), "w"(b)
   12989            : /* No clobbers */);
   12990   return result;
   12991 }
   12992 
   12993 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   12994 vuzp1_s16 (int16x4_t a, int16x4_t b)
   12995 {
   12996   int16x4_t result;
   12997   __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
   12998            : "=w"(result)
   12999            : "w"(a), "w"(b)
   13000            : /* No clobbers */);
   13001   return result;
   13002 }
   13003 
   13004 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   13005 vuzp1_s32 (int32x2_t a, int32x2_t b)
   13006 {
   13007   int32x2_t result;
   13008   __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
   13009            : "=w"(result)
   13010            : "w"(a), "w"(b)
   13011            : /* No clobbers */);
   13012   return result;
   13013 }
   13014 
   13015 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   13016 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
   13017 {
   13018   uint8x8_t result;
   13019   __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
   13020            : "=w"(result)
   13021            : "w"(a), "w"(b)
   13022            : /* No clobbers */);
   13023   return result;
   13024 }
   13025 
   13026 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   13027 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
   13028 {
   13029   uint16x4_t result;
   13030   __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
   13031            : "=w"(result)
   13032            : "w"(a), "w"(b)
   13033            : /* No clobbers */);
   13034   return result;
   13035 }
   13036 
   13037 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   13038 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
   13039 {
   13040   uint32x2_t result;
   13041   __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
   13042            : "=w"(result)
   13043            : "w"(a), "w"(b)
   13044            : /* No clobbers */);
   13045   return result;
   13046 }
   13047 
   13048 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   13049 vuzp1q_f32 (float32x4_t a, float32x4_t b)
   13050 {
   13051   float32x4_t result;
   13052   __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
   13053            : "=w"(result)
   13054            : "w"(a), "w"(b)
   13055            : /* No clobbers */);
   13056   return result;
   13057 }
   13058 
   13059 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   13060 vuzp1q_f64 (float64x2_t a, float64x2_t b)
   13061 {
   13062   float64x2_t result;
   13063   __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
   13064            : "=w"(result)
   13065            : "w"(a), "w"(b)
   13066            : /* No clobbers */);
   13067   return result;
   13068 }
   13069 
   13070 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   13071 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
   13072 {
   13073   poly8x16_t result;
   13074   __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
   13075            : "=w"(result)
   13076            : "w"(a), "w"(b)
   13077            : /* No clobbers */);
   13078   return result;
   13079 }
   13080 
   13081 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   13082 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
   13083 {
   13084   poly16x8_t result;
   13085   __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
   13086            : "=w"(result)
   13087            : "w"(a), "w"(b)
   13088            : /* No clobbers */);
   13089   return result;
   13090 }
   13091 
   13092 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   13093 vuzp1q_s8 (int8x16_t a, int8x16_t b)
   13094 {
   13095   int8x16_t result;
   13096   __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
   13097            : "=w"(result)
   13098            : "w"(a), "w"(b)
   13099            : /* No clobbers */);
   13100   return result;
   13101 }
   13102 
   13103 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   13104 vuzp1q_s16 (int16x8_t a, int16x8_t b)
   13105 {
   13106   int16x8_t result;
   13107   __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
   13108            : "=w"(result)
   13109            : "w"(a), "w"(b)
   13110            : /* No clobbers */);
   13111   return result;
   13112 }
   13113 
   13114 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   13115 vuzp1q_s32 (int32x4_t a, int32x4_t b)
   13116 {
   13117   int32x4_t result;
   13118   __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
   13119            : "=w"(result)
   13120            : "w"(a), "w"(b)
   13121            : /* No clobbers */);
   13122   return result;
   13123 }
   13124 
   13125 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   13126 vuzp1q_s64 (int64x2_t a, int64x2_t b)
   13127 {
   13128   int64x2_t result;
   13129   __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
   13130            : "=w"(result)
   13131            : "w"(a), "w"(b)
   13132            : /* No clobbers */);
   13133   return result;
   13134 }
   13135 
   13136 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   13137 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
   13138 {
   13139   uint8x16_t result;
   13140   __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
   13141            : "=w"(result)
   13142            : "w"(a), "w"(b)
   13143            : /* No clobbers */);
   13144   return result;
   13145 }
   13146 
   13147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   13148 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
   13149 {
   13150   uint16x8_t result;
   13151   __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
   13152            : "=w"(result)
   13153            : "w"(a), "w"(b)
   13154            : /* No clobbers */);
   13155   return result;
   13156 }
   13157 
   13158 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   13159 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
   13160 {
   13161   uint32x4_t result;
   13162   __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
   13163            : "=w"(result)
   13164            : "w"(a), "w"(b)
   13165            : /* No clobbers */);
   13166   return result;
   13167 }
   13168 
   13169 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   13170 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
   13171 {
   13172   uint64x2_t result;
   13173   __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
   13174            : "=w"(result)
   13175            : "w"(a), "w"(b)
   13176            : /* No clobbers */);
   13177   return result;
   13178 }
   13179 
   13180 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   13181 vuzp2_f32 (float32x2_t a, float32x2_t b)
   13182 {
   13183   float32x2_t result;
   13184   __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
   13185            : "=w"(result)
   13186            : "w"(a), "w"(b)
   13187            : /* No clobbers */);
   13188   return result;
   13189 }
   13190 
   13191 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   13192 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
   13193 {
   13194   poly8x8_t result;
   13195   __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
   13196            : "=w"(result)
   13197            : "w"(a), "w"(b)
   13198            : /* No clobbers */);
   13199   return result;
   13200 }
   13201 
   13202 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   13203 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
   13204 {
   13205   poly16x4_t result;
   13206   __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
   13207            : "=w"(result)
   13208            : "w"(a), "w"(b)
   13209            : /* No clobbers */);
   13210   return result;
   13211 }
   13212 
   13213 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   13214 vuzp2_s8 (int8x8_t a, int8x8_t b)
   13215 {
   13216   int8x8_t result;
   13217   __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
   13218            : "=w"(result)
   13219            : "w"(a), "w"(b)
   13220            : /* No clobbers */);
   13221   return result;
   13222 }
   13223 
   13224 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   13225 vuzp2_s16 (int16x4_t a, int16x4_t b)
   13226 {
   13227   int16x4_t result;
   13228   __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
   13229            : "=w"(result)
   13230            : "w"(a), "w"(b)
   13231            : /* No clobbers */);
   13232   return result;
   13233 }
   13234 
   13235 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   13236 vuzp2_s32 (int32x2_t a, int32x2_t b)
   13237 {
   13238   int32x2_t result;
   13239   __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
   13240            : "=w"(result)
   13241            : "w"(a), "w"(b)
   13242            : /* No clobbers */);
   13243   return result;
   13244 }
   13245 
   13246 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   13247 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
   13248 {
   13249   uint8x8_t result;
   13250   __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
   13251            : "=w"(result)
   13252            : "w"(a), "w"(b)
   13253            : /* No clobbers */);
   13254   return result;
   13255 }
   13256 
   13257 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   13258 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
   13259 {
   13260   uint16x4_t result;
   13261   __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
   13262            : "=w"(result)
   13263            : "w"(a), "w"(b)
   13264            : /* No clobbers */);
   13265   return result;
   13266 }
   13267 
   13268 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   13269 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
   13270 {
   13271   uint32x2_t result;
   13272   __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
   13273            : "=w"(result)
   13274            : "w"(a), "w"(b)
   13275            : /* No clobbers */);
   13276   return result;
   13277 }
   13278 
   13279 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   13280 vuzp2q_f32 (float32x4_t a, float32x4_t b)
   13281 {
   13282   float32x4_t result;
   13283   __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
   13284            : "=w"(result)
   13285            : "w"(a), "w"(b)
   13286            : /* No clobbers */);
   13287   return result;
   13288 }
   13289 
   13290 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   13291 vuzp2q_f64 (float64x2_t a, float64x2_t b)
   13292 {
   13293   float64x2_t result;
   13294   __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
   13295            : "=w"(result)
   13296            : "w"(a), "w"(b)
   13297            : /* No clobbers */);
   13298   return result;
   13299 }
   13300 
   13301 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   13302 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
   13303 {
   13304   poly8x16_t result;
   13305   __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
   13306            : "=w"(result)
   13307            : "w"(a), "w"(b)
   13308            : /* No clobbers */);
   13309   return result;
   13310 }
   13311 
   13312 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   13313 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
   13314 {
   13315   poly16x8_t result;
   13316   __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
   13317            : "=w"(result)
   13318            : "w"(a), "w"(b)
   13319            : /* No clobbers */);
   13320   return result;
   13321 }
   13322 
   13323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   13324 vuzp2q_s8 (int8x16_t a, int8x16_t b)
   13325 {
   13326   int8x16_t result;
   13327   __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
   13328            : "=w"(result)
   13329            : "w"(a), "w"(b)
   13330            : /* No clobbers */);
   13331   return result;
   13332 }
   13333 
   13334 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   13335 vuzp2q_s16 (int16x8_t a, int16x8_t b)
   13336 {
   13337   int16x8_t result;
   13338   __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
   13339            : "=w"(result)
   13340            : "w"(a), "w"(b)
   13341            : /* No clobbers */);
   13342   return result;
   13343 }
   13344 
   13345 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   13346 vuzp2q_s32 (int32x4_t a, int32x4_t b)
   13347 {
   13348   int32x4_t result;
   13349   __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
   13350            : "=w"(result)
   13351            : "w"(a), "w"(b)
   13352            : /* No clobbers */);
   13353   return result;
   13354 }
   13355 
   13356 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   13357 vuzp2q_s64 (int64x2_t a, int64x2_t b)
   13358 {
   13359   int64x2_t result;
   13360   __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
   13361            : "=w"(result)
   13362            : "w"(a), "w"(b)
   13363            : /* No clobbers */);
   13364   return result;
   13365 }
   13366 
   13367 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   13368 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
   13369 {
   13370   uint8x16_t result;
   13371   __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
   13372            : "=w"(result)
   13373            : "w"(a), "w"(b)
   13374            : /* No clobbers */);
   13375   return result;
   13376 }
   13377 
   13378 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   13379 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
   13380 {
   13381   uint16x8_t result;
   13382   __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
   13383            : "=w"(result)
   13384            : "w"(a), "w"(b)
   13385            : /* No clobbers */);
   13386   return result;
   13387 }
   13388 
   13389 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   13390 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
   13391 {
   13392   uint32x4_t result;
   13393   __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
   13394            : "=w"(result)
   13395            : "w"(a), "w"(b)
   13396            : /* No clobbers */);
   13397   return result;
   13398 }
   13399 
   13400 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   13401 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
   13402 {
   13403   uint64x2_t result;
   13404   __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
   13405            : "=w"(result)
   13406            : "w"(a), "w"(b)
   13407            : /* No clobbers */);
   13408   return result;
   13409 }
   13410 
   13411 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   13412 vzip1_f32 (float32x2_t a, float32x2_t b)
   13413 {
   13414   float32x2_t result;
   13415   __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
   13416            : "=w"(result)
   13417            : "w"(a), "w"(b)
   13418            : /* No clobbers */);
   13419   return result;
   13420 }
   13421 
   13422 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   13423 vzip1_p8 (poly8x8_t a, poly8x8_t b)
   13424 {
   13425   poly8x8_t result;
   13426   __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
   13427            : "=w"(result)
   13428            : "w"(a), "w"(b)
   13429            : /* No clobbers */);
   13430   return result;
   13431 }
   13432 
   13433 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   13434 vzip1_p16 (poly16x4_t a, poly16x4_t b)
   13435 {
   13436   poly16x4_t result;
   13437   __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
   13438            : "=w"(result)
   13439            : "w"(a), "w"(b)
   13440            : /* No clobbers */);
   13441   return result;
   13442 }
   13443 
   13444 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   13445 vzip1_s8 (int8x8_t a, int8x8_t b)
   13446 {
   13447   int8x8_t result;
   13448   __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
   13449            : "=w"(result)
   13450            : "w"(a), "w"(b)
   13451            : /* No clobbers */);
   13452   return result;
   13453 }
   13454 
   13455 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   13456 vzip1_s16 (int16x4_t a, int16x4_t b)
   13457 {
   13458   int16x4_t result;
   13459   __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
   13460            : "=w"(result)
   13461            : "w"(a), "w"(b)
   13462            : /* No clobbers */);
   13463   return result;
   13464 }
   13465 
   13466 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   13467 vzip1_s32 (int32x2_t a, int32x2_t b)
   13468 {
   13469   int32x2_t result;
   13470   __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
   13471            : "=w"(result)
   13472            : "w"(a), "w"(b)
   13473            : /* No clobbers */);
   13474   return result;
   13475 }
   13476 
   13477 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   13478 vzip1_u8 (uint8x8_t a, uint8x8_t b)
   13479 {
   13480   uint8x8_t result;
   13481   __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
   13482            : "=w"(result)
   13483            : "w"(a), "w"(b)
   13484            : /* No clobbers */);
   13485   return result;
   13486 }
   13487 
   13488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   13489 vzip1_u16 (uint16x4_t a, uint16x4_t b)
   13490 {
   13491   uint16x4_t result;
   13492   __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
   13493            : "=w"(result)
   13494            : "w"(a), "w"(b)
   13495            : /* No clobbers */);
   13496   return result;
   13497 }
   13498 
   13499 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   13500 vzip1_u32 (uint32x2_t a, uint32x2_t b)
   13501 {
   13502   uint32x2_t result;
   13503   __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
   13504            : "=w"(result)
   13505            : "w"(a), "w"(b)
   13506            : /* No clobbers */);
   13507   return result;
   13508 }
   13509 
   13510 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   13511 vzip1q_f32 (float32x4_t a, float32x4_t b)
   13512 {
   13513   float32x4_t result;
   13514   __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
   13515            : "=w"(result)
   13516            : "w"(a), "w"(b)
   13517            : /* No clobbers */);
   13518   return result;
   13519 }
   13520 
   13521 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   13522 vzip1q_f64 (float64x2_t a, float64x2_t b)
   13523 {
   13524   float64x2_t result;
   13525   __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
   13526            : "=w"(result)
   13527            : "w"(a), "w"(b)
   13528            : /* No clobbers */);
   13529   return result;
   13530 }
   13531 
   13532 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   13533 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
   13534 {
   13535   poly8x16_t result;
   13536   __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
   13537            : "=w"(result)
   13538            : "w"(a), "w"(b)
   13539            : /* No clobbers */);
   13540   return result;
   13541 }
   13542 
   13543 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   13544 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
   13545 {
   13546   poly16x8_t result;
   13547   __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
   13548            : "=w"(result)
   13549            : "w"(a), "w"(b)
   13550            : /* No clobbers */);
   13551   return result;
   13552 }
   13553 
   13554 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   13555 vzip1q_s8 (int8x16_t a, int8x16_t b)
   13556 {
   13557   int8x16_t result;
   13558   __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
   13559            : "=w"(result)
   13560            : "w"(a), "w"(b)
   13561            : /* No clobbers */);
   13562   return result;
   13563 }
   13564 
   13565 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   13566 vzip1q_s16 (int16x8_t a, int16x8_t b)
   13567 {
   13568   int16x8_t result;
   13569   __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
   13570            : "=w"(result)
   13571            : "w"(a), "w"(b)
   13572            : /* No clobbers */);
   13573   return result;
   13574 }
   13575 
   13576 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   13577 vzip1q_s32 (int32x4_t a, int32x4_t b)
   13578 {
   13579   int32x4_t result;
   13580   __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
   13581            : "=w"(result)
   13582            : "w"(a), "w"(b)
   13583            : /* No clobbers */);
   13584   return result;
   13585 }
   13586 
   13587 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   13588 vzip1q_s64 (int64x2_t a, int64x2_t b)
   13589 {
   13590   int64x2_t result;
   13591   __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
   13592            : "=w"(result)
   13593            : "w"(a), "w"(b)
   13594            : /* No clobbers */);
   13595   return result;
   13596 }
   13597 
   13598 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   13599 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
   13600 {
   13601   uint8x16_t result;
   13602   __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
   13603            : "=w"(result)
   13604            : "w"(a), "w"(b)
   13605            : /* No clobbers */);
   13606   return result;
   13607 }
   13608 
   13609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   13610 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
   13611 {
   13612   uint16x8_t result;
   13613   __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
   13614            : "=w"(result)
   13615            : "w"(a), "w"(b)
   13616            : /* No clobbers */);
   13617   return result;
   13618 }
   13619 
   13620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   13621 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
   13622 {
   13623   uint32x4_t result;
   13624   __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
   13625            : "=w"(result)
   13626            : "w"(a), "w"(b)
   13627            : /* No clobbers */);
   13628   return result;
   13629 }
   13630 
   13631 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   13632 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
   13633 {
   13634   uint64x2_t result;
   13635   __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
   13636            : "=w"(result)
   13637            : "w"(a), "w"(b)
   13638            : /* No clobbers */);
   13639   return result;
   13640 }
   13641 
   13642 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   13643 vzip2_f32 (float32x2_t a, float32x2_t b)
   13644 {
   13645   float32x2_t result;
   13646   __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
   13647            : "=w"(result)
   13648            : "w"(a), "w"(b)
   13649            : /* No clobbers */);
   13650   return result;
   13651 }
   13652 
   13653 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   13654 vzip2_p8 (poly8x8_t a, poly8x8_t b)
   13655 {
   13656   poly8x8_t result;
   13657   __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
   13658            : "=w"(result)
   13659            : "w"(a), "w"(b)
   13660            : /* No clobbers */);
   13661   return result;
   13662 }
   13663 
   13664 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   13665 vzip2_p16 (poly16x4_t a, poly16x4_t b)
   13666 {
   13667   poly16x4_t result;
   13668   __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
   13669            : "=w"(result)
   13670            : "w"(a), "w"(b)
   13671            : /* No clobbers */);
   13672   return result;
   13673 }
   13674 
   13675 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   13676 vzip2_s8 (int8x8_t a, int8x8_t b)
   13677 {
   13678   int8x8_t result;
   13679   __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
   13680            : "=w"(result)
   13681            : "w"(a), "w"(b)
   13682            : /* No clobbers */);
   13683   return result;
   13684 }
   13685 
   13686 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   13687 vzip2_s16 (int16x4_t a, int16x4_t b)
   13688 {
   13689   int16x4_t result;
   13690   __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
   13691            : "=w"(result)
   13692            : "w"(a), "w"(b)
   13693            : /* No clobbers */);
   13694   return result;
   13695 }
   13696 
   13697 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   13698 vzip2_s32 (int32x2_t a, int32x2_t b)
   13699 {
   13700   int32x2_t result;
   13701   __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
   13702            : "=w"(result)
   13703            : "w"(a), "w"(b)
   13704            : /* No clobbers */);
   13705   return result;
   13706 }
   13707 
   13708 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   13709 vzip2_u8 (uint8x8_t a, uint8x8_t b)
   13710 {
   13711   uint8x8_t result;
   13712   __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
   13713            : "=w"(result)
   13714            : "w"(a), "w"(b)
   13715            : /* No clobbers */);
   13716   return result;
   13717 }
   13718 
   13719 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   13720 vzip2_u16 (uint16x4_t a, uint16x4_t b)
   13721 {
   13722   uint16x4_t result;
   13723   __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
   13724            : "=w"(result)
   13725            : "w"(a), "w"(b)
   13726            : /* No clobbers */);
   13727   return result;
   13728 }
   13729 
   13730 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   13731 vzip2_u32 (uint32x2_t a, uint32x2_t b)
   13732 {
   13733   uint32x2_t result;
   13734   __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
   13735            : "=w"(result)
   13736            : "w"(a), "w"(b)
   13737            : /* No clobbers */);
   13738   return result;
   13739 }
   13740 
   13741 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   13742 vzip2q_f32 (float32x4_t a, float32x4_t b)
   13743 {
   13744   float32x4_t result;
   13745   __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
   13746            : "=w"(result)
   13747            : "w"(a), "w"(b)
   13748            : /* No clobbers */);
   13749   return result;
   13750 }
   13751 
   13752 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   13753 vzip2q_f64 (float64x2_t a, float64x2_t b)
   13754 {
   13755   float64x2_t result;
   13756   __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
   13757            : "=w"(result)
   13758            : "w"(a), "w"(b)
   13759            : /* No clobbers */);
   13760   return result;
   13761 }
   13762 
   13763 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   13764 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
   13765 {
   13766   poly8x16_t result;
   13767   __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
   13768            : "=w"(result)
   13769            : "w"(a), "w"(b)
   13770            : /* No clobbers */);
   13771   return result;
   13772 }
   13773 
   13774 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   13775 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
   13776 {
   13777   poly16x8_t result;
   13778   __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
   13779            : "=w"(result)
   13780            : "w"(a), "w"(b)
   13781            : /* No clobbers */);
   13782   return result;
   13783 }
   13784 
   13785 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   13786 vzip2q_s8 (int8x16_t a, int8x16_t b)
   13787 {
   13788   int8x16_t result;
   13789   __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
   13790            : "=w"(result)
   13791            : "w"(a), "w"(b)
   13792            : /* No clobbers */);
   13793   return result;
   13794 }
   13795 
   13796 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   13797 vzip2q_s16 (int16x8_t a, int16x8_t b)
   13798 {
   13799   int16x8_t result;
   13800   __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
   13801            : "=w"(result)
   13802            : "w"(a), "w"(b)
   13803            : /* No clobbers */);
   13804   return result;
   13805 }
   13806 
   13807 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   13808 vzip2q_s32 (int32x4_t a, int32x4_t b)
   13809 {
   13810   int32x4_t result;
   13811   __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
   13812            : "=w"(result)
   13813            : "w"(a), "w"(b)
   13814            : /* No clobbers */);
   13815   return result;
   13816 }
   13817 
   13818 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   13819 vzip2q_s64 (int64x2_t a, int64x2_t b)
   13820 {
   13821   int64x2_t result;
   13822   __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
   13823            : "=w"(result)
   13824            : "w"(a), "w"(b)
   13825            : /* No clobbers */);
   13826   return result;
   13827 }
   13828 
   13829 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   13830 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
   13831 {
   13832   uint8x16_t result;
   13833   __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
   13834            : "=w"(result)
   13835            : "w"(a), "w"(b)
   13836            : /* No clobbers */);
   13837   return result;
   13838 }
   13839 
   13840 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   13841 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
   13842 {
   13843   uint16x8_t result;
   13844   __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
   13845            : "=w"(result)
   13846            : "w"(a), "w"(b)
   13847            : /* No clobbers */);
   13848   return result;
   13849 }
   13850 
   13851 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   13852 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
   13853 {
   13854   uint32x4_t result;
   13855   __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
   13856            : "=w"(result)
   13857            : "w"(a), "w"(b)
   13858            : /* No clobbers */);
   13859   return result;
   13860 }
   13861 
   13862 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   13863 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
   13864 {
   13865   uint64x2_t result;
   13866   __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
   13867            : "=w"(result)
   13868            : "w"(a), "w"(b)
   13869            : /* No clobbers */);
   13870   return result;
   13871 }
   13872 
   13873 /* End of temporary inline asm implementations.  */
   13874 
   13875 /* Start of temporary inline asm for vldn, vstn and friends.  */
   13876 
   13877 /* Create struct element types for duplicating loads.
   13878 
   13879    Create 2 element structures of:
   13880 
   13881    +------+----+----+----+----+
   13882    |      | 8  | 16 | 32 | 64 |
   13883    +------+----+----+----+----+
   13884    |int   | Y  | Y  | N  | N  |
   13885    +------+----+----+----+----+
   13886    |uint  | Y  | Y  | N  | N  |
   13887    +------+----+----+----+----+
   13888    |float | -  | -  | N  | N  |
   13889    +------+----+----+----+----+
   13890    |poly  | Y  | Y  | -  | -  |
   13891    +------+----+----+----+----+
   13892 
   13893    Create 3 element structures of:
   13894 
   13895    +------+----+----+----+----+
   13896    |      | 8  | 16 | 32 | 64 |
   13897    +------+----+----+----+----+
   13898    |int   | Y  | Y  | Y  | Y  |
   13899    +------+----+----+----+----+
   13900    |uint  | Y  | Y  | Y  | Y  |
   13901    +------+----+----+----+----+
   13902    |float | -  | -  | Y  | Y  |
   13903    +------+----+----+----+----+
   13904    |poly  | Y  | Y  | -  | -  |
   13905    +------+----+----+----+----+
   13906 
   13907    Create 4 element structures of:
   13908 
   13909    +------+----+----+----+----+
   13910    |      | 8  | 16 | 32 | 64 |
   13911    +------+----+----+----+----+
   13912    |int   | Y  | N  | N  | Y  |
   13913    +------+----+----+----+----+
   13914    |uint  | Y  | N  | N  | Y  |
   13915    +------+----+----+----+----+
   13916    |float | -  | -  | N  | Y  |
   13917    +------+----+----+----+----+
   13918    |poly  | Y  | N  | -  | -  |
   13919    +------+----+----+----+----+
   13920 
   13921   This is required for casting memory reference.  */
   13922 #define __STRUCTN(t, sz, nelem)			\
   13923   typedef struct t ## sz ## x ## nelem ## _t {	\
   13924     t ## sz ## _t val[nelem];			\
   13925   }  t ## sz ## x ## nelem ## _t;
   13926 
   13927 /* 2-element structs.  */
   13928 __STRUCTN (int, 8, 2)
   13929 __STRUCTN (int, 16, 2)
   13930 __STRUCTN (uint, 8, 2)
   13931 __STRUCTN (uint, 16, 2)
   13932 __STRUCTN (poly, 8, 2)
   13933 __STRUCTN (poly, 16, 2)
   13934 /* 3-element structs.  */
   13935 __STRUCTN (int, 8, 3)
   13936 __STRUCTN (int, 16, 3)
   13937 __STRUCTN (int, 32, 3)
   13938 __STRUCTN (int, 64, 3)
   13939 __STRUCTN (uint, 8, 3)
   13940 __STRUCTN (uint, 16, 3)
   13941 __STRUCTN (uint, 32, 3)
   13942 __STRUCTN (uint, 64, 3)
   13943 __STRUCTN (float, 32, 3)
   13944 __STRUCTN (float, 64, 3)
   13945 __STRUCTN (poly, 8, 3)
   13946 __STRUCTN (poly, 16, 3)
   13947 /* 4-element structs.  */
   13948 __STRUCTN (int, 8, 4)
   13949 __STRUCTN (int, 64, 4)
   13950 __STRUCTN (uint, 8, 4)
   13951 __STRUCTN (uint, 64, 4)
   13952 __STRUCTN (poly, 8, 4)
   13953 __STRUCTN (float, 64, 4)
   13954 #undef __STRUCTN
   13955 
   13956 #define __LD2R_FUNC(rettype, structtype, ptrtype,			\
   13957 		    regsuffix, funcsuffix, Q)				\
   13958   __extension__ static __inline rettype					\
   13959   __attribute__ ((__always_inline__)) 					\
   13960   vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
   13961   {									\
   13962     rettype result;							\
   13963     __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
   13964 	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
   13965 	     : "=Q"(result)						\
   13966 	     : "Q"(*(const structtype *)ptr)				\
   13967 	     : "memory", "v16", "v17");					\
   13968     return result;							\
   13969   }
   13970 
   13971 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
   13972 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
   13973 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
   13974 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
   13975 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
   13976 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
   13977 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
   13978 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
   13979 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
   13980 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
   13981 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
   13982 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
   13983 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
   13984 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
   13985 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
   13986 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
   13987 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
   13988 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
   13989 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
   13990 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
   13991 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
   13992 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
   13993 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
   13994 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
   13995 
   13996 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix,			\
   13997 			lnsuffix, funcsuffix, Q)			\
   13998   __extension__ static __inline rettype					\
   13999   __attribute__ ((__always_inline__))					\
   14000   vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
   14001 				     rettype b, const int c)		\
   14002   {									\
   14003     rettype result;							\
   14004     __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
   14005 	     "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t"	\
   14006 	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
   14007 	     : "=Q"(result)						\
   14008 	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
   14009 	     : "memory", "v16", "v17");					\
   14010     return result;							\
   14011   }
   14012 
   14013 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
   14014 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
   14015 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
   14016 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
   14017 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
   14018 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
   14019 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
   14020 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
   14021 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
   14022 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
   14023 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
   14024 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
   14025 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
   14026 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
   14027 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
   14028 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
   14029 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
   14030 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
   14031 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
   14032 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
   14033 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
   14034 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
   14035 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
   14036 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
   14037 
   14038 #define __LD3R_FUNC(rettype, structtype, ptrtype,			\
   14039 		    regsuffix, funcsuffix, Q)				\
   14040   __extension__ static __inline rettype					\
   14041   __attribute__ ((__always_inline__))					\
   14042   vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
   14043   {									\
   14044     rettype result;							\
   14045     __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
   14046 	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
   14047 	     : "=Q"(result)						\
   14048 	     : "Q"(*(const structtype *)ptr)				\
   14049 	     : "memory", "v16", "v17", "v18");				\
   14050     return result;							\
   14051   }
   14052 
   14053 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
   14054 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
   14055 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
   14056 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
   14057 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
   14058 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
   14059 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
   14060 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
   14061 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
   14062 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
   14063 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
   14064 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
   14065 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
   14066 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
   14067 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
   14068 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
   14069 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
   14070 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
   14071 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
   14072 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
   14073 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
   14074 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
   14075 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
   14076 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
   14077 
   14078 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix,			\
   14079 			lnsuffix, funcsuffix, Q)			\
   14080   __extension__ static __inline rettype					\
   14081   __attribute__ ((__always_inline__))					\
   14082   vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
   14083 				     rettype b, const int c)		\
   14084   {									\
   14085     rettype result;							\
   14086     __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
   14087 	     "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t"	\
   14088 	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
   14089 	     : "=Q"(result)						\
   14090 	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
   14091 	     : "memory", "v16", "v17", "v18");				\
   14092     return result;							\
   14093   }
   14094 
   14095 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
   14096 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
   14097 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
   14098 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
   14099 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
   14100 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
   14101 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
   14102 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
   14103 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
   14104 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
   14105 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
   14106 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
   14107 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
   14108 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
   14109 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
   14110 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
   14111 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
   14112 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
   14113 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
   14114 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
   14115 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
   14116 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
   14117 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
   14118 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
   14119 
   14120 #define __LD4R_FUNC(rettype, structtype, ptrtype,			\
   14121 		    regsuffix, funcsuffix, Q)				\
   14122   __extension__ static __inline rettype					\
   14123   __attribute__ ((__always_inline__))					\
   14124   vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
   14125   {									\
   14126     rettype result;							\
   14127     __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
   14128 	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
   14129 	     : "=Q"(result)						\
   14130 	     : "Q"(*(const structtype *)ptr)				\
   14131 	     : "memory", "v16", "v17", "v18", "v19");			\
   14132     return result;							\
   14133   }
   14134 
   14135 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
   14136 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
   14137 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
   14138 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
   14139 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
   14140 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
   14141 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
   14142 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
   14143 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
   14144 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
   14145 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
   14146 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
   14147 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
   14148 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
   14149 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
   14150 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
   14151 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
   14152 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
   14153 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
   14154 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
   14155 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
   14156 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
   14157 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
   14158 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
   14159 
   14160 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix,			\
   14161 			lnsuffix, funcsuffix, Q)			\
   14162   __extension__ static __inline rettype					\
   14163   __attribute__ ((__always_inline__))					\
   14164   vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
   14165 				     rettype b, const int c)		\
   14166   {									\
   14167     rettype result;							\
   14168     __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
   14169 	     "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t"	\
   14170 	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
   14171 	     : "=Q"(result)						\
   14172 	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
   14173 	     : "memory", "v16", "v17", "v18", "v19");			\
   14174     return result;							\
   14175   }
   14176 
   14177 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
   14178 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
   14179 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
   14180 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
   14181 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
   14182 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
   14183 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
   14184 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
   14185 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
   14186 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
   14187 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
   14188 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
   14189 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
   14190 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
   14191 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
   14192 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
   14193 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
   14194 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
   14195 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
   14196 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
   14197 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
   14198 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
   14199 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
   14200 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
   14201 
   14202 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix,			\
   14203 			lnsuffix, funcsuffix, Q)			\
   14204   typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype;	\
   14205   __extension__ static __inline void					\
   14206   __attribute__ ((__always_inline__))					\
   14207   vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
   14208 				     intype b, const int c)		\
   14209   {									\
   14210     __ST2_LANE_STRUCTURE_##intype *__p =				\
   14211 				(__ST2_LANE_STRUCTURE_##intype *)ptr;	\
   14212     __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
   14213 	     "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t"	\
   14214 	     : "=Q"(*__p)						\
   14215 	     : "Q"(b), "i"(c)						\
   14216 	     : "v16", "v17");						\
   14217   }
   14218 
   14219 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
   14220 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
   14221 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
   14222 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
   14223 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
   14224 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
   14225 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
   14226 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
   14227 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
   14228 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
   14229 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
   14230 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
   14231 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
   14232 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
   14233 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
   14234 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
   14235 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
   14236 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
   14237 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
   14238 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
   14239 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
   14240 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
   14241 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
   14242 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
   14243 
   14244 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix,			\
   14245 			lnsuffix, funcsuffix, Q)			\
   14246   typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype;	\
   14247   __extension__ static __inline void					\
   14248   __attribute__ ((__always_inline__))					\
   14249   vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
   14250 				     intype b, const int c)		\
   14251   {									\
   14252     __ST3_LANE_STRUCTURE_##intype *__p =				\
   14253 				(__ST3_LANE_STRUCTURE_##intype *)ptr;	\
   14254     __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
   14255 	     "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t"	\
   14256 	     : "=Q"(*__p)						\
   14257 	     : "Q"(b), "i"(c)						\
   14258 	     : "v16", "v17", "v18");					\
   14259   }
   14260 
   14261 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
   14262 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
   14263 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
   14264 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
   14265 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
   14266 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
   14267 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
   14268 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
   14269 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
   14270 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
   14271 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
   14272 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
   14273 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
   14274 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
   14275 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
   14276 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
   14277 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
   14278 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
   14279 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
   14280 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
   14281 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
   14282 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
   14283 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
   14284 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
   14285 
   14286 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix,			\
   14287 			lnsuffix, funcsuffix, Q)			\
   14288   typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype;	\
   14289   __extension__ static __inline void					\
   14290   __attribute__ ((__always_inline__))					\
   14291   vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
   14292 				     intype b, const int c)		\
   14293   {									\
   14294     __ST4_LANE_STRUCTURE_##intype *__p =				\
   14295 				(__ST4_LANE_STRUCTURE_##intype *)ptr;	\
   14296     __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
   14297 	     "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t"	\
   14298 	     : "=Q"(*__p)						\
   14299 	     : "Q"(b), "i"(c)						\
   14300 	     : "v16", "v17", "v18", "v19");				\
   14301   }
   14302 
   14303 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
   14304 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
   14305 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
   14306 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
   14307 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
   14308 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
   14309 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
   14310 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
   14311 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
   14312 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
   14313 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
   14314 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
   14315 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
   14316 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
   14317 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
   14318 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
   14319 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
   14320 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
   14321 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
   14322 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
   14323 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
   14324 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
   14325 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
   14326 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
   14327 
   14328 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   14329 vaddlv_s32 (int32x2_t a)
   14330 {
   14331   int64_t result;
   14332   __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
   14333   return result;
   14334 }
   14335 
   14336 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   14337 vaddlv_u32 (uint32x2_t a)
   14338 {
   14339   uint64_t result;
   14340   __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
   14341   return result;
   14342 }
   14343 
   14344 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   14345 vpaddd_s64 (int64x2_t __a)
   14346 {
   14347   return __builtin_aarch64_addpdi (__a);
   14348 }
   14349 
   14350 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   14351 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
   14352 {
   14353   return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
   14354 }
   14355 
   14356 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   14357 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
   14358 {
   14359   return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
   14360 }
   14361 
   14362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   14363 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
   14364 {
   14365   return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
   14366 }
   14367 
   14368 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   14369 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
   14370 {
   14371   return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
   14372 }
   14373 
   14374 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   14375 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
   14376 {
   14377   return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
   14378 }
   14379 
   14380 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   14381 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
   14382 {
   14383   return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
   14384 }
   14385 
   14386 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   14387 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
   14388 {
   14389   return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
   14390 }
   14391 
   14392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   14393 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
   14394 {
   14395   return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
   14396 }
   14397 
   14398 /* Table intrinsics.  */
   14399 
   14400 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14401 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
   14402 {
   14403   poly8x8_t result;
   14404   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   14405            : "=w"(result)
   14406            : "w"(a), "w"(b)
   14407            : /* No clobbers */);
   14408   return result;
   14409 }
   14410 
   14411 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14412 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
   14413 {
   14414   int8x8_t result;
   14415   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   14416            : "=w"(result)
   14417            : "w"(a), "w"(b)
   14418            : /* No clobbers */);
   14419   return result;
   14420 }
   14421 
   14422 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14423 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
   14424 {
   14425   uint8x8_t result;
   14426   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   14427            : "=w"(result)
   14428            : "w"(a), "w"(b)
   14429            : /* No clobbers */);
   14430   return result;
   14431 }
   14432 
   14433 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14434 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
   14435 {
   14436   poly8x16_t result;
   14437   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
   14438            : "=w"(result)
   14439            : "w"(a), "w"(b)
   14440            : /* No clobbers */);
   14441   return result;
   14442 }
   14443 
   14444 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14445 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
   14446 {
   14447   int8x16_t result;
   14448   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
   14449            : "=w"(result)
   14450            : "w"(a), "w"(b)
   14451            : /* No clobbers */);
   14452   return result;
   14453 }
   14454 
   14455 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14456 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
   14457 {
   14458   uint8x16_t result;
   14459   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
   14460            : "=w"(result)
   14461            : "w"(a), "w"(b)
   14462            : /* No clobbers */);
   14463   return result;
   14464 }
   14465 
   14466 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14467 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
   14468 {
   14469   int8x8_t result;
   14470   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14471 	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
   14472 	   :"=w"(result)
   14473 	   :"Q"(tab),"w"(idx)
   14474 	   :"memory", "v16", "v17");
   14475   return result;
   14476 }
   14477 
   14478 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14479 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
   14480 {
   14481   uint8x8_t result;
   14482   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14483 	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
   14484 	   :"=w"(result)
   14485 	   :"Q"(tab),"w"(idx)
   14486 	   :"memory", "v16", "v17");
   14487   return result;
   14488 }
   14489 
   14490 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14491 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
   14492 {
   14493   poly8x8_t result;
   14494   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14495 	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
   14496 	   :"=w"(result)
   14497 	   :"Q"(tab),"w"(idx)
   14498 	   :"memory", "v16", "v17");
   14499   return result;
   14500 }
   14501 
   14502 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14503 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
   14504 {
   14505   int8x16_t result;
   14506   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14507 	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
   14508 	   :"=w"(result)
   14509 	   :"Q"(tab),"w"(idx)
   14510 	   :"memory", "v16", "v17");
   14511   return result;
   14512 }
   14513 
   14514 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14515 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
   14516 {
   14517   uint8x16_t result;
   14518   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14519 	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
   14520 	   :"=w"(result)
   14521 	   :"Q"(tab),"w"(idx)
   14522 	   :"memory", "v16", "v17");
   14523   return result;
   14524 }
   14525 
   14526 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14527 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
   14528 {
   14529   poly8x16_t result;
   14530   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14531 	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
   14532 	   :"=w"(result)
   14533 	   :"Q"(tab),"w"(idx)
   14534 	   :"memory", "v16", "v17");
   14535   return result;
   14536 }
   14537 
   14538 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14539 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
   14540 {
   14541   int8x8_t result;
   14542   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14543 	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
   14544 	   :"=w"(result)
   14545 	   :"Q"(tab),"w"(idx)
   14546 	   :"memory", "v16", "v17", "v18");
   14547   return result;
   14548 }
   14549 
   14550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14551 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
   14552 {
   14553   uint8x8_t result;
   14554   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14555 	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
   14556 	   :"=w"(result)
   14557 	   :"Q"(tab),"w"(idx)
   14558 	   :"memory", "v16", "v17", "v18");
   14559   return result;
   14560 }
   14561 
   14562 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14563 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
   14564 {
   14565   poly8x8_t result;
   14566   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14567 	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
   14568 	   :"=w"(result)
   14569 	   :"Q"(tab),"w"(idx)
   14570 	   :"memory", "v16", "v17", "v18");
   14571   return result;
   14572 }
   14573 
   14574 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14575 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
   14576 {
   14577   int8x16_t result;
   14578   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14579 	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
   14580 	   :"=w"(result)
   14581 	   :"Q"(tab),"w"(idx)
   14582 	   :"memory", "v16", "v17", "v18");
   14583   return result;
   14584 }
   14585 
   14586 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14587 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
   14588 {
   14589   uint8x16_t result;
   14590   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14591 	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
   14592 	   :"=w"(result)
   14593 	   :"Q"(tab),"w"(idx)
   14594 	   :"memory", "v16", "v17", "v18");
   14595   return result;
   14596 }
   14597 
   14598 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14599 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
   14600 {
   14601   poly8x16_t result;
   14602   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14603 	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
   14604 	   :"=w"(result)
   14605 	   :"Q"(tab),"w"(idx)
   14606 	   :"memory", "v16", "v17", "v18");
   14607   return result;
   14608 }
   14609 
   14610 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14611 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
   14612 {
   14613   int8x8_t result;
   14614   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14615 	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
   14616 	   :"=w"(result)
   14617 	   :"Q"(tab),"w"(idx)
   14618 	   :"memory", "v16", "v17", "v18", "v19");
   14619   return result;
   14620 }
   14621 
   14622 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14623 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
   14624 {
   14625   uint8x8_t result;
   14626   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14627 	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
   14628 	   :"=w"(result)
   14629 	   :"Q"(tab),"w"(idx)
   14630 	   :"memory", "v16", "v17", "v18", "v19");
   14631   return result;
   14632 }
   14633 
   14634 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14635 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
   14636 {
   14637   poly8x8_t result;
   14638   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14639 	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
   14640 	   :"=w"(result)
   14641 	   :"Q"(tab),"w"(idx)
   14642 	   :"memory", "v16", "v17", "v18", "v19");
   14643   return result;
   14644 }
   14645 
   14646 
   14647 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14648 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
   14649 {
   14650   int8x16_t result;
   14651   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14652 	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
   14653 	   :"=w"(result)
   14654 	   :"Q"(tab),"w"(idx)
   14655 	   :"memory", "v16", "v17", "v18", "v19");
   14656   return result;
   14657 }
   14658 
   14659 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14660 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
   14661 {
   14662   uint8x16_t result;
   14663   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14664 	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
   14665 	   :"=w"(result)
   14666 	   :"Q"(tab),"w"(idx)
   14667 	   :"memory", "v16", "v17", "v18", "v19");
   14668   return result;
   14669 }
   14670 
   14671 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14672 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
   14673 {
   14674   poly8x16_t result;
   14675   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14676 	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
   14677 	   :"=w"(result)
   14678 	   :"Q"(tab),"w"(idx)
   14679 	   :"memory", "v16", "v17", "v18", "v19");
   14680   return result;
   14681 }
   14682 
   14683 
   14684 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14685 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
   14686 {
   14687   int8x8_t result = r;
   14688   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
   14689            : "+w"(result)
   14690            : "w"(tab), "w"(idx)
   14691            : /* No clobbers */);
   14692   return result;
   14693 }
   14694 
   14695 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14696 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
   14697 {
   14698   uint8x8_t result = r;
   14699   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
   14700            : "+w"(result)
   14701            : "w"(tab), "w"(idx)
   14702            : /* No clobbers */);
   14703   return result;
   14704 }
   14705 
   14706 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14707 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
   14708 {
   14709   poly8x8_t result = r;
   14710   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
   14711            : "+w"(result)
   14712            : "w"(tab), "w"(idx)
   14713            : /* No clobbers */);
   14714   return result;
   14715 }
   14716 
   14717 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14718 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
   14719 {
   14720   int8x16_t result = r;
   14721   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
   14722            : "+w"(result)
   14723            : "w"(tab), "w"(idx)
   14724            : /* No clobbers */);
   14725   return result;
   14726 }
   14727 
   14728 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14729 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
   14730 {
   14731   uint8x16_t result = r;
   14732   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
   14733            : "+w"(result)
   14734            : "w"(tab), "w"(idx)
   14735            : /* No clobbers */);
   14736   return result;
   14737 }
   14738 
   14739 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14740 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
   14741 {
   14742   poly8x16_t result = r;
   14743   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
   14744            : "+w"(result)
   14745            : "w"(tab), "w"(idx)
   14746            : /* No clobbers */);
   14747   return result;
   14748 }
   14749 
   14750 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14751 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
   14752 {
   14753   int8x8_t result = r;
   14754   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14755 	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
   14756 	   :"+w"(result)
   14757 	   :"Q"(tab),"w"(idx)
   14758 	   :"memory", "v16", "v17");
   14759   return result;
   14760 }
   14761 
   14762 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14763 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
   14764 {
   14765   uint8x8_t result = r;
   14766   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14767 	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
   14768 	   :"+w"(result)
   14769 	   :"Q"(tab),"w"(idx)
   14770 	   :"memory", "v16", "v17");
   14771   return result;
   14772 }
   14773 
   14774 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14775 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
   14776 {
   14777   poly8x8_t result = r;
   14778   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14779 	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
   14780 	   :"+w"(result)
   14781 	   :"Q"(tab),"w"(idx)
   14782 	   :"memory", "v16", "v17");
   14783   return result;
   14784 }
   14785 
   14786 
   14787 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14788 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
   14789 {
   14790   int8x16_t result = r;
   14791   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14792 	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
   14793 	   :"+w"(result)
   14794 	   :"Q"(tab),"w"(idx)
   14795 	   :"memory", "v16", "v17");
   14796   return result;
   14797 }
   14798 
   14799 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14800 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
   14801 {
   14802   uint8x16_t result = r;
   14803   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14804 	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
   14805 	   :"+w"(result)
   14806 	   :"Q"(tab),"w"(idx)
   14807 	   :"memory", "v16", "v17");
   14808   return result;
   14809 }
   14810 
   14811 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14812 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
   14813 {
   14814   poly8x16_t result = r;
   14815   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
   14816 	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
   14817 	   :"+w"(result)
   14818 	   :"Q"(tab),"w"(idx)
   14819 	   :"memory", "v16", "v17");
   14820   return result;
   14821 }
   14822 
   14823 
   14824 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14825 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
   14826 {
   14827   int8x8_t result = r;
   14828   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14829 	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
   14830 	   :"+w"(result)
   14831 	   :"Q"(tab),"w"(idx)
   14832 	   :"memory", "v16", "v17", "v18");
   14833   return result;
   14834 }
   14835 
   14836 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14837 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
   14838 {
   14839   uint8x8_t result = r;
   14840   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14841 	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
   14842 	   :"+w"(result)
   14843 	   :"Q"(tab),"w"(idx)
   14844 	   :"memory", "v16", "v17", "v18");
   14845   return result;
   14846 }
   14847 
   14848 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14849 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
   14850 {
   14851   poly8x8_t result = r;
   14852   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14853 	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
   14854 	   :"+w"(result)
   14855 	   :"Q"(tab),"w"(idx)
   14856 	   :"memory", "v16", "v17", "v18");
   14857   return result;
   14858 }
   14859 
   14860 
   14861 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14862 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
   14863 {
   14864   int8x16_t result = r;
   14865   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14866 	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
   14867 	   :"+w"(result)
   14868 	   :"Q"(tab),"w"(idx)
   14869 	   :"memory", "v16", "v17", "v18");
   14870   return result;
   14871 }
   14872 
   14873 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14874 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
   14875 {
   14876   uint8x16_t result = r;
   14877   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14878 	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
   14879 	   :"+w"(result)
   14880 	   :"Q"(tab),"w"(idx)
   14881 	   :"memory", "v16", "v17", "v18");
   14882   return result;
   14883 }
   14884 
   14885 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14886 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
   14887 {
   14888   poly8x16_t result = r;
   14889   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
   14890 	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
   14891 	   :"+w"(result)
   14892 	   :"Q"(tab),"w"(idx)
   14893 	   :"memory", "v16", "v17", "v18");
   14894   return result;
   14895 }
   14896 
   14897 
   14898 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14899 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
   14900 {
   14901   int8x8_t result = r;
   14902   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14903 	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
   14904 	   :"+w"(result)
   14905 	   :"Q"(tab),"w"(idx)
   14906 	   :"memory", "v16", "v17", "v18", "v19");
   14907   return result;
   14908 }
   14909 
   14910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14911 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
   14912 {
   14913   uint8x8_t result = r;
   14914   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14915 	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
   14916 	   :"+w"(result)
   14917 	   :"Q"(tab),"w"(idx)
   14918 	   :"memory", "v16", "v17", "v18", "v19");
   14919   return result;
   14920 }
   14921 
   14922 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14923 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
   14924 {
   14925   poly8x8_t result = r;
   14926   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14927 	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
   14928 	   :"+w"(result)
   14929 	   :"Q"(tab),"w"(idx)
   14930 	   :"memory", "v16", "v17", "v18", "v19");
   14931   return result;
   14932 }
   14933 
   14934 
   14935 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   14936 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
   14937 {
   14938   int8x16_t result = r;
   14939   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14940 	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
   14941 	   :"+w"(result)
   14942 	   :"Q"(tab),"w"(idx)
   14943 	   :"memory", "v16", "v17", "v18", "v19");
   14944   return result;
   14945 }
   14946 
   14947 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   14948 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
   14949 {
   14950   uint8x16_t result = r;
   14951   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14952 	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
   14953 	   :"+w"(result)
   14954 	   :"Q"(tab),"w"(idx)
   14955 	   :"memory", "v16", "v17", "v18", "v19");
   14956   return result;
   14957 }
   14958 
   14959 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   14960 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
   14961 {
   14962   poly8x16_t result = r;
   14963   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
   14964 	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
   14965 	   :"+w"(result)
   14966 	   :"Q"(tab),"w"(idx)
   14967 	   :"memory", "v16", "v17", "v18", "v19");
   14968   return result;
   14969 }
   14970 
   14971 /* V7 legacy table intrinsics.  */
   14972 
   14973 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   14974 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
   14975 {
   14976   int8x8_t result;
   14977   int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   14978   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   14979            : "=w"(result)
   14980            : "w"(temp), "w"(idx)
   14981            : /* No clobbers */);
   14982   return result;
   14983 }
   14984 
   14985 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   14986 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
   14987 {
   14988   uint8x8_t result;
   14989   uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   14990   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   14991            : "=w"(result)
   14992            : "w"(temp), "w"(idx)
   14993            : /* No clobbers */);
   14994   return result;
   14995 }
   14996 
   14997 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   14998 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
   14999 {
   15000   poly8x8_t result;
   15001   poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
   15002   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   15003            : "=w"(result)
   15004            : "w"(temp), "w"(idx)
   15005            : /* No clobbers */);
   15006   return result;
   15007 }
   15008 
   15009 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   15010 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
   15011 {
   15012   int8x8_t result;
   15013   int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
   15014   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   15015            : "=w"(result)
   15016            : "w"(temp), "w"(idx)
   15017            : /* No clobbers */);
   15018   return result;
   15019 }
   15020 
   15021 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15022 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
   15023 {
   15024   uint8x8_t result;
   15025   uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
   15026   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   15027            : "=w"(result)
   15028            : "w"(temp), "w"(idx)
   15029            : /* No clobbers */);
   15030   return result;
   15031 }
   15032 
   15033 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   15034 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
   15035 {
   15036   poly8x8_t result;
   15037   poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
   15038   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
   15039            : "=w"(result)
   15040            : "w"(temp), "w"(idx)
   15041            : /* No clobbers */);
   15042   return result;
   15043 }
   15044 
   15045 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   15046 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
   15047 {
   15048   int8x8_t result;
   15049   int8x16x2_t temp;
   15050   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
   15051   temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   15052   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15053 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15054            : "=w"(result)
   15055            : "Q"(temp), "w"(idx)
   15056            : "v16", "v17", "memory");
   15057   return result;
   15058 }
   15059 
   15060 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15061 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
   15062 {
   15063   uint8x8_t result;
   15064   uint8x16x2_t temp;
   15065   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
   15066   temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   15067   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15068 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15069            : "=w"(result)
   15070            : "Q"(temp), "w"(idx)
   15071            : "v16", "v17", "memory");
   15072   return result;
   15073 }
   15074 
   15075 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   15076 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
   15077 {
   15078   poly8x8_t result;
   15079   poly8x16x2_t temp;
   15080   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
   15081   temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
   15082   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15083 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15084            : "=w"(result)
   15085            : "Q"(temp), "w"(idx)
   15086            : "v16", "v17", "memory");
   15087   return result;
   15088 }
   15089 
   15090 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   15091 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
   15092 {
   15093   int8x8_t result;
   15094   int8x16x2_t temp;
   15095   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
   15096   temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
   15097   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15098 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15099            : "=w"(result)
   15100            : "Q"(temp), "w"(idx)
   15101            : "v16", "v17", "memory");
   15102   return result;
   15103 }
   15104 
   15105 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15106 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
   15107 {
   15108   uint8x8_t result;
   15109   uint8x16x2_t temp;
   15110   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
   15111   temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
   15112   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15113 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15114            : "=w"(result)
   15115            : "Q"(temp), "w"(idx)
   15116            : "v16", "v17", "memory");
   15117   return result;
   15118 }
   15119 
   15120 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   15121 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
   15122 {
   15123   poly8x8_t result;
   15124   poly8x16x2_t temp;
   15125   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
   15126   temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
   15127   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15128 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15129            : "=w"(result)
   15130            : "Q"(temp), "w"(idx)
   15131            : "v16", "v17", "memory");
   15132   return result;
   15133 }
   15134 
   15135 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   15136 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
   15137 {
   15138   int8x8_t result = r;
   15139   int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
   15140   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
   15141            : "+w"(result)
   15142            : "w"(temp), "w"(idx)
   15143            : /* No clobbers */);
   15144   return result;
   15145 }
   15146 
   15147 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15148 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
   15149 {
   15150   uint8x8_t result = r;
   15151   uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
   15152   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
   15153            : "+w"(result)
   15154            : "w"(temp), "w"(idx)
   15155            : /* No clobbers */);
   15156   return result;
   15157 }
   15158 
   15159 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   15160 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
   15161 {
   15162   poly8x8_t result = r;
   15163   poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
   15164   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
   15165            : "+w"(result)
   15166            : "w"(temp), "w"(idx)
   15167            : /* No clobbers */);
   15168   return result;
   15169 }
   15170 
   15171 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   15172 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
   15173 {
   15174   int8x8_t result = r;
   15175   int8x16x2_t temp;
   15176   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
   15177   temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
   15178   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15179 	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15180            : "+w"(result)
   15181            : "Q"(temp), "w"(idx)
   15182            : "v16", "v17", "memory");
   15183   return result;
   15184 }
   15185 
   15186 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15187 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
   15188 {
   15189   uint8x8_t result = r;
   15190   uint8x16x2_t temp;
   15191   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
   15192   temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
   15193   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15194 	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15195            : "+w"(result)
   15196            : "Q"(temp), "w"(idx)
   15197            : "v16", "v17", "memory");
   15198   return result;
   15199 }
   15200 
   15201 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   15202 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
   15203 {
   15204   poly8x8_t result = r;
   15205   poly8x16x2_t temp;
   15206   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
   15207   temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
   15208   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
   15209 	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
   15210            : "+w"(result)
   15211            : "Q"(temp), "w"(idx)
   15212            : "v16", "v17", "memory");
   15213   return result;
   15214 }
   15215 
   15216 /* End of temporary inline asm.  */
   15217 
   15218 /* Start of optimal implementations in approved order.  */
   15219 
   15220 /* vabs  */
   15221 
   15222 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   15223 vabs_f32 (float32x2_t __a)
   15224 {
   15225   return __builtin_aarch64_absv2sf (__a);
   15226 }
   15227 
   15228 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   15229 vabs_f64 (float64x1_t __a)
   15230 {
   15231   return __builtin_fabs (__a);
   15232 }
   15233 
   15234 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   15235 vabs_s8 (int8x8_t __a)
   15236 {
   15237   return __builtin_aarch64_absv8qi (__a);
   15238 }
   15239 
   15240 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   15241 vabs_s16 (int16x4_t __a)
   15242 {
   15243   return __builtin_aarch64_absv4hi (__a);
   15244 }
   15245 
   15246 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   15247 vabs_s32 (int32x2_t __a)
   15248 {
   15249   return __builtin_aarch64_absv2si (__a);
   15250 }
   15251 
   15252 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   15253 vabs_s64 (int64x1_t __a)
   15254 {
   15255   return __builtin_llabs (__a);
   15256 }
   15257 
   15258 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   15259 vabsq_f32 (float32x4_t __a)
   15260 {
   15261   return __builtin_aarch64_absv4sf (__a);
   15262 }
   15263 
   15264 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   15265 vabsq_f64 (float64x2_t __a)
   15266 {
   15267   return __builtin_aarch64_absv2df (__a);
   15268 }
   15269 
   15270 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   15271 vabsq_s8 (int8x16_t __a)
   15272 {
   15273   return __builtin_aarch64_absv16qi (__a);
   15274 }
   15275 
   15276 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   15277 vabsq_s16 (int16x8_t __a)
   15278 {
   15279   return __builtin_aarch64_absv8hi (__a);
   15280 }
   15281 
   15282 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   15283 vabsq_s32 (int32x4_t __a)
   15284 {
   15285   return __builtin_aarch64_absv4si (__a);
   15286 }
   15287 
   15288 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   15289 vabsq_s64 (int64x2_t __a)
   15290 {
   15291   return __builtin_aarch64_absv2di (__a);
   15292 }
   15293 
   15294 /* vadd */
   15295 
   15296 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   15297 vaddd_s64 (int64x1_t __a, int64x1_t __b)
   15298 {
   15299   return __a + __b;
   15300 }
   15301 
   15302 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15303 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
   15304 {
   15305   return __a + __b;
   15306 }
   15307 
   15308 /* vaddv */
   15309 
   15310 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   15311 vaddv_s8 (int8x8_t __a)
   15312 {
   15313   return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
   15314 }
   15315 
   15316 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   15317 vaddv_s16 (int16x4_t __a)
   15318 {
   15319   return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
   15320 }
   15321 
   15322 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   15323 vaddv_s32 (int32x2_t __a)
   15324 {
   15325   return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
   15326 }
   15327 
   15328 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   15329 vaddv_u8 (uint8x8_t __a)
   15330 {
   15331   return vget_lane_u8 ((uint8x8_t)
   15332 		__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
   15333 		0);
   15334 }
   15335 
   15336 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   15337 vaddv_u16 (uint16x4_t __a)
   15338 {
   15339   return vget_lane_u16 ((uint16x4_t)
   15340 		__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
   15341 		0);
   15342 }
   15343 
   15344 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   15345 vaddv_u32 (uint32x2_t __a)
   15346 {
   15347   return vget_lane_u32 ((uint32x2_t)
   15348 		__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
   15349 		0);
   15350 }
   15351 
   15352 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   15353 vaddvq_s8 (int8x16_t __a)
   15354 {
   15355   return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
   15356 			0);
   15357 }
   15358 
   15359 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   15360 vaddvq_s16 (int16x8_t __a)
   15361 {
   15362   return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
   15363 }
   15364 
   15365 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   15366 vaddvq_s32 (int32x4_t __a)
   15367 {
   15368   return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
   15369 }
   15370 
   15371 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   15372 vaddvq_s64 (int64x2_t __a)
   15373 {
   15374   return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
   15375 }
   15376 
   15377 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   15378 vaddvq_u8 (uint8x16_t __a)
   15379 {
   15380   return vgetq_lane_u8 ((uint8x16_t)
   15381 		__builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
   15382 		0);
   15383 }
   15384 
   15385 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   15386 vaddvq_u16 (uint16x8_t __a)
   15387 {
   15388   return vgetq_lane_u16 ((uint16x8_t)
   15389 		__builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
   15390 		0);
   15391 }
   15392 
   15393 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   15394 vaddvq_u32 (uint32x4_t __a)
   15395 {
   15396   return vgetq_lane_u32 ((uint32x4_t)
   15397 		__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
   15398 		0);
   15399 }
   15400 
   15401 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   15402 vaddvq_u64 (uint64x2_t __a)
   15403 {
   15404   return vgetq_lane_u64 ((uint64x2_t)
   15405 		__builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
   15406 		0);
   15407 }
   15408 
   15409 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   15410 vaddv_f32 (float32x2_t __a)
   15411 {
   15412   float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
   15413   return vget_lane_f32 (__t, 0);
   15414 }
   15415 
   15416 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   15417 vaddvq_f32 (float32x4_t __a)
   15418 {
   15419   float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
   15420   return vgetq_lane_f32 (__t, 0);
   15421 }
   15422 
   15423 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   15424 vaddvq_f64 (float64x2_t __a)
   15425 {
   15426   float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
   15427   return vgetq_lane_f64 (__t, 0);
   15428 }
   15429 
   15430 /* vbsl  */
   15431 
   15432 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   15433 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
   15434 {
   15435   return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
   15436 }
   15437 
   15438 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   15439 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
   15440 {
   15441   return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
   15442 }
   15443 
   15444 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   15445 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
   15446 {
   15447   return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
   15448 }
   15449 
   15450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   15451 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
   15452 {
   15453   return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
   15454 }
   15455 
   15456 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   15457 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
   15458 {
   15459   return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
   15460 }
   15461 
   15462 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   15463 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
   15464 {
   15465   return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
   15466 }
   15467 
   15468 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   15469 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
   15470 {
   15471   return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c);
   15472 }
   15473 
   15474 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15475 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
   15476 {
   15477   return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
   15478 }
   15479 
   15480 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   15481 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
   15482 {
   15483   return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
   15484 }
   15485 
   15486 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15487 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
   15488 {
   15489   return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
   15490 }
   15491 
   15492 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15493 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
   15494 {
   15495   return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c);
   15496 }
   15497 
   15498 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   15499 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
   15500 {
   15501   return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
   15502 }
   15503 
   15504 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   15505 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
   15506 {
   15507   return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
   15508 }
   15509 
   15510 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   15511 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
   15512 {
   15513   return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
   15514 }
   15515 
   15516 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   15517 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
   15518 {
   15519   return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
   15520 }
   15521 
   15522 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   15523 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
   15524 {
   15525   return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
   15526 }
   15527 
   15528 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   15529 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
   15530 {
   15531   return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
   15532 }
   15533 
   15534 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   15535 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
   15536 {
   15537   return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
   15538 }
   15539 
   15540 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   15541 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
   15542 {
   15543   return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
   15544 }
   15545 
   15546 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   15547 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
   15548 {
   15549   return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
   15550 }
   15551 
   15552 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   15553 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
   15554 {
   15555   return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
   15556 }
   15557 
   15558 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15559 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
   15560 {
   15561   return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
   15562 }
   15563 
   15564 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15565 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
   15566 {
   15567   return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
   15568 }
   15569 
   15570 #ifdef __ARM_FEATURE_CRYPTO
   15571 
   15572 /* vaes  */
   15573 
   15574 static __inline uint8x16_t
   15575 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
   15576 {
   15577   return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
   15578 }
   15579 
   15580 static __inline uint8x16_t
   15581 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
   15582 {
   15583   return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
   15584 }
   15585 
   15586 static __inline uint8x16_t
   15587 vaesmcq_u8 (uint8x16_t data)
   15588 {
   15589   return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
   15590 }
   15591 
   15592 static __inline uint8x16_t
   15593 vaesimcq_u8 (uint8x16_t data)
   15594 {
   15595   return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
   15596 }
   15597 
   15598 #endif
   15599 
   15600 /* vcage  */
   15601 
   15602 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   15603 vcages_f32 (float32_t __a, float32_t __b)
   15604 {
   15605   return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
   15606 }
   15607 
   15608 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15609 vcage_f32 (float32x2_t __a, float32x2_t __b)
   15610 {
   15611   return vabs_f32 (__a) >= vabs_f32 (__b);
   15612 }
   15613 
   15614 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15615 vcageq_f32 (float32x4_t __a, float32x4_t __b)
   15616 {
   15617   return vabsq_f32 (__a) >= vabsq_f32 (__b);
   15618 }
   15619 
   15620 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   15621 vcaged_f64 (float64_t __a, float64_t __b)
   15622 {
   15623   return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
   15624 }
   15625 
   15626 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15627 vcageq_f64 (float64x2_t __a, float64x2_t __b)
   15628 {
   15629   return vabsq_f64 (__a) >= vabsq_f64 (__b);
   15630 }
   15631 
   15632 /* vcagt  */
   15633 
   15634 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   15635 vcagts_f32 (float32_t __a, float32_t __b)
   15636 {
   15637   return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
   15638 }
   15639 
   15640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15641 vcagt_f32 (float32x2_t __a, float32x2_t __b)
   15642 {
   15643   return vabs_f32 (__a) > vabs_f32 (__b);
   15644 }
   15645 
   15646 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15647 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
   15648 {
   15649   return vabsq_f32 (__a) > vabsq_f32 (__b);
   15650 }
   15651 
   15652 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   15653 vcagtd_f64 (float64_t __a, float64_t __b)
   15654 {
   15655   return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
   15656 }
   15657 
   15658 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15659 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
   15660 {
   15661   return vabsq_f64 (__a) > vabsq_f64 (__b);
   15662 }
   15663 
   15664 /* vcale  */
   15665 
   15666 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15667 vcale_f32 (float32x2_t __a, float32x2_t __b)
   15668 {
   15669   return vabs_f32 (__a) <= vabs_f32 (__b);
   15670 }
   15671 
   15672 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15673 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
   15674 {
   15675   return vabsq_f32 (__a) <= vabsq_f32 (__b);
   15676 }
   15677 
   15678 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15679 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
   15680 {
   15681   return vabsq_f64 (__a) <= vabsq_f64 (__b);
   15682 }
   15683 
   15684 /* vcalt  */
   15685 
   15686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15687 vcalt_f32 (float32x2_t __a, float32x2_t __b)
   15688 {
   15689   return vabs_f32 (__a) < vabs_f32 (__b);
   15690 }
   15691 
   15692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15693 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
   15694 {
   15695   return vabsq_f32 (__a) < vabsq_f32 (__b);
   15696 }
   15697 
   15698 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15699 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
   15700 {
   15701   return vabsq_f64 (__a) < vabsq_f64 (__b);
   15702 }
   15703 
   15704 /* vceq - vector.  */
   15705 
   15706 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15707 vceq_f32 (float32x2_t __a, float32x2_t __b)
   15708 {
   15709   return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
   15710 }
   15711 
   15712 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15713 vceq_f64 (float64x1_t __a, float64x1_t __b)
   15714 {
   15715   return __a == __b ? -1ll : 0ll;
   15716 }
   15717 
   15718 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15719 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
   15720 {
   15721   return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
   15722 						 (int8x8_t) __b);
   15723 }
   15724 
   15725 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15726 vceq_s8 (int8x8_t __a, int8x8_t __b)
   15727 {
   15728   return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
   15729 }
   15730 
   15731 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   15732 vceq_s16 (int16x4_t __a, int16x4_t __b)
   15733 {
   15734   return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
   15735 }
   15736 
   15737 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15738 vceq_s32 (int32x2_t __a, int32x2_t __b)
   15739 {
   15740   return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
   15741 }
   15742 
   15743 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15744 vceq_s64 (int64x1_t __a, int64x1_t __b)
   15745 {
   15746   return __a == __b ? -1ll : 0ll;
   15747 }
   15748 
   15749 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15750 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
   15751 {
   15752   return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
   15753 						 (int8x8_t) __b);
   15754 }
   15755 
   15756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   15757 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
   15758 {
   15759   return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
   15760 						  (int16x4_t) __b);
   15761 }
   15762 
   15763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15764 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
   15765 {
   15766   return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
   15767 						  (int32x2_t) __b);
   15768 }
   15769 
   15770 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15771 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
   15772 {
   15773   return __a == __b ? -1ll : 0ll;
   15774 }
   15775 
   15776 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15777 vceqq_f32 (float32x4_t __a, float32x4_t __b)
   15778 {
   15779   return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
   15780 }
   15781 
   15782 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15783 vceqq_f64 (float64x2_t __a, float64x2_t __b)
   15784 {
   15785   return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
   15786 }
   15787 
   15788 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   15789 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
   15790 {
   15791   return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
   15792 						   (int8x16_t) __b);
   15793 }
   15794 
   15795 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   15796 vceqq_s8 (int8x16_t __a, int8x16_t __b)
   15797 {
   15798   return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
   15799 }
   15800 
   15801 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   15802 vceqq_s16 (int16x8_t __a, int16x8_t __b)
   15803 {
   15804   return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
   15805 }
   15806 
   15807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15808 vceqq_s32 (int32x4_t __a, int32x4_t __b)
   15809 {
   15810   return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
   15811 }
   15812 
   15813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15814 vceqq_s64 (int64x2_t __a, int64x2_t __b)
   15815 {
   15816   return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
   15817 }
   15818 
   15819 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   15820 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
   15821 {
   15822   return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
   15823 						   (int8x16_t) __b);
   15824 }
   15825 
   15826 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   15827 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
   15828 {
   15829   return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
   15830 						  (int16x8_t) __b);
   15831 }
   15832 
   15833 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15834 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
   15835 {
   15836   return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
   15837 						  (int32x4_t) __b);
   15838 }
   15839 
   15840 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15841 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
   15842 {
   15843   return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
   15844 						  (int64x2_t) __b);
   15845 }
   15846 
   15847 /* vceq - scalar.  */
   15848 
   15849 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   15850 vceqs_f32 (float32_t __a, float32_t __b)
   15851 {
   15852   return __a == __b ? -1 : 0;
   15853 }
   15854 
   15855 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15856 vceqd_s64 (int64x1_t __a, int64x1_t __b)
   15857 {
   15858   return __a == __b ? -1ll : 0ll;
   15859 }
   15860 
   15861 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15862 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
   15863 {
   15864   return __a == __b ? -1ll : 0ll;
   15865 }
   15866 
   15867 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   15868 vceqd_f64 (float64_t __a, float64_t __b)
   15869 {
   15870   return __a == __b ? -1ll : 0ll;
   15871 }
   15872 
   15873 /* vceqz - vector.  */
   15874 
   15875 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15876 vceqz_f32 (float32x2_t __a)
   15877 {
   15878   float32x2_t __b = {0.0f, 0.0f};
   15879   return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
   15880 }
   15881 
   15882 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15883 vceqz_f64 (float64x1_t __a)
   15884 {
   15885   return __a == 0.0 ? -1ll : 0ll;
   15886 }
   15887 
   15888 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15889 vceqz_p8 (poly8x8_t __a)
   15890 {
   15891   poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   15892   return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
   15893 						 (int8x8_t) __b);
   15894 }
   15895 
   15896 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15897 vceqz_s8 (int8x8_t __a)
   15898 {
   15899   int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   15900   return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
   15901 }
   15902 
   15903 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   15904 vceqz_s16 (int16x4_t __a)
   15905 {
   15906   int16x4_t __b = {0, 0, 0, 0};
   15907   return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
   15908 }
   15909 
   15910 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15911 vceqz_s32 (int32x2_t __a)
   15912 {
   15913   int32x2_t __b = {0, 0};
   15914   return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
   15915 }
   15916 
   15917 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15918 vceqz_s64 (int64x1_t __a)
   15919 {
   15920   return __a == 0ll ? -1ll : 0ll;
   15921 }
   15922 
   15923 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   15924 vceqz_u8 (uint8x8_t __a)
   15925 {
   15926   uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   15927   return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
   15928 						 (int8x8_t) __b);
   15929 }
   15930 
   15931 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   15932 vceqz_u16 (uint16x4_t __a)
   15933 {
   15934   uint16x4_t __b = {0, 0, 0, 0};
   15935   return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
   15936 						  (int16x4_t) __b);
   15937 }
   15938 
   15939 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   15940 vceqz_u32 (uint32x2_t __a)
   15941 {
   15942   uint32x2_t __b = {0, 0};
   15943   return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
   15944 						  (int32x2_t) __b);
   15945 }
   15946 
   15947 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   15948 vceqz_u64 (uint64x1_t __a)
   15949 {
   15950   return __a == 0ll ? -1ll : 0ll;
   15951 }
   15952 
   15953 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15954 vceqzq_f32 (float32x4_t __a)
   15955 {
   15956   float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
   15957   return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
   15958 }
   15959 
   15960 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15961 vceqzq_f64 (float64x2_t __a)
   15962 {
   15963   float64x2_t __b = {0.0, 0.0};
   15964   return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
   15965 }
   15966 
   15967 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   15968 vceqzq_p8 (poly8x16_t __a)
   15969 {
   15970   poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   15971 		    0, 0, 0, 0, 0, 0, 0, 0};
   15972   return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
   15973 						   (int8x16_t) __b);
   15974 }
   15975 
   15976 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   15977 vceqzq_s8 (int8x16_t __a)
   15978 {
   15979   int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   15980 		   0, 0, 0, 0, 0, 0, 0, 0};
   15981   return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
   15982 }
   15983 
   15984 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   15985 vceqzq_s16 (int16x8_t __a)
   15986 {
   15987   int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   15988   return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
   15989 }
   15990 
   15991 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   15992 vceqzq_s32 (int32x4_t __a)
   15993 {
   15994   int32x4_t __b = {0, 0, 0, 0};
   15995   return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
   15996 }
   15997 
   15998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   15999 vceqzq_s64 (int64x2_t __a)
   16000 {
   16001   int64x2_t __b = {0, 0};
   16002   return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
   16003 }
   16004 
   16005 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16006 vceqzq_u8 (uint8x16_t __a)
   16007 {
   16008   uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   16009 		    0, 0, 0, 0, 0, 0, 0, 0};
   16010   return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
   16011 						   (int8x16_t) __b);
   16012 }
   16013 
   16014 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16015 vceqzq_u16 (uint16x8_t __a)
   16016 {
   16017   uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16018   return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
   16019 						  (int16x8_t) __b);
   16020 }
   16021 
   16022 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16023 vceqzq_u32 (uint32x4_t __a)
   16024 {
   16025   uint32x4_t __b = {0, 0, 0, 0};
   16026   return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
   16027 						  (int32x4_t) __b);
   16028 }
   16029 
   16030 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16031 vceqzq_u64 (uint64x2_t __a)
   16032 {
   16033   uint64x2_t __b = {0, 0};
   16034   return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
   16035 						  (int64x2_t) __b);
   16036 }
   16037 
   16038 /* vceqz - scalar.  */
   16039 
   16040 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   16041 vceqzs_f32 (float32_t __a)
   16042 {
   16043   return __a == 0.0f ? -1 : 0;
   16044 }
   16045 
   16046 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16047 vceqzd_s64 (int64x1_t __a)
   16048 {
   16049   return __a == 0 ? -1ll : 0ll;
   16050 }
   16051 
   16052 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16053 vceqzd_u64 (int64x1_t __a)
   16054 {
   16055   return __a == 0 ? -1ll : 0ll;
   16056 }
   16057 
   16058 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   16059 vceqzd_f64 (float64_t __a)
   16060 {
   16061   return __a == 0.0 ? -1ll : 0ll;
   16062 }
   16063 
   16064 /* vcge - vector.  */
   16065 
   16066 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16067 vcge_f32 (float32x2_t __a, float32x2_t __b)
   16068 {
   16069   return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
   16070 }
   16071 
   16072 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16073 vcge_f64 (float64x1_t __a, float64x1_t __b)
   16074 {
   16075   return __a >= __b ? -1ll : 0ll;
   16076 }
   16077 
   16078 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16079 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
   16080 {
   16081   return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
   16082 						 (int8x8_t) __b);
   16083 }
   16084 
   16085 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16086 vcge_s8 (int8x8_t __a, int8x8_t __b)
   16087 {
   16088   return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
   16089 }
   16090 
   16091 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16092 vcge_s16 (int16x4_t __a, int16x4_t __b)
   16093 {
   16094   return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
   16095 }
   16096 
   16097 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16098 vcge_s32 (int32x2_t __a, int32x2_t __b)
   16099 {
   16100   return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
   16101 }
   16102 
   16103 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16104 vcge_s64 (int64x1_t __a, int64x1_t __b)
   16105 {
   16106   return __a >= __b ? -1ll : 0ll;
   16107 }
   16108 
   16109 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16110 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
   16111 {
   16112   return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
   16113 						 (int8x8_t) __b);
   16114 }
   16115 
   16116 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16117 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
   16118 {
   16119   return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
   16120 						  (int16x4_t) __b);
   16121 }
   16122 
   16123 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16124 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
   16125 {
   16126   return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
   16127 						  (int32x2_t) __b);
   16128 }
   16129 
   16130 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16131 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
   16132 {
   16133   return __a >= __b ? -1ll : 0ll;
   16134 }
   16135 
   16136 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16137 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
   16138 {
   16139   return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
   16140 }
   16141 
   16142 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16143 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
   16144 {
   16145   return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
   16146 }
   16147 
   16148 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16149 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
   16150 {
   16151   return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
   16152 						   (int8x16_t) __b);
   16153 }
   16154 
   16155 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16156 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
   16157 {
   16158   return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
   16159 }
   16160 
   16161 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16162 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
   16163 {
   16164   return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
   16165 }
   16166 
   16167 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16168 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
   16169 {
   16170   return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
   16171 }
   16172 
   16173 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16174 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
   16175 {
   16176   return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
   16177 }
   16178 
   16179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16180 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
   16181 {
   16182   return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
   16183 						   (int8x16_t) __b);
   16184 }
   16185 
   16186 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16187 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
   16188 {
   16189   return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
   16190 						  (int16x8_t) __b);
   16191 }
   16192 
   16193 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16194 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
   16195 {
   16196   return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
   16197 						  (int32x4_t) __b);
   16198 }
   16199 
   16200 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16201 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
   16202 {
   16203   return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
   16204 						  (int64x2_t) __b);
   16205 }
   16206 
   16207 /* vcge - scalar.  */
   16208 
   16209 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   16210 vcges_f32 (float32_t __a, float32_t __b)
   16211 {
   16212   return __a >= __b ? -1 : 0;
   16213 }
   16214 
   16215 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16216 vcged_s64 (int64x1_t __a, int64x1_t __b)
   16217 {
   16218   return __a >= __b ? -1ll : 0ll;
   16219 }
   16220 
   16221 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16222 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
   16223 {
   16224   return __a >= __b ? -1ll : 0ll;
   16225 }
   16226 
   16227 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   16228 vcged_f64 (float64_t __a, float64_t __b)
   16229 {
   16230   return __a >= __b ? -1ll : 0ll;
   16231 }
   16232 
   16233 /* vcgez - vector.  */
   16234 
   16235 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16236 vcgez_f32 (float32x2_t __a)
   16237 {
   16238   float32x2_t __b = {0.0f, 0.0f};
   16239   return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
   16240 }
   16241 
   16242 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16243 vcgez_f64 (float64x1_t __a)
   16244 {
   16245   return __a >= 0.0 ? -1ll : 0ll;
   16246 }
   16247 
   16248 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16249 vcgez_p8 (poly8x8_t __a)
   16250 {
   16251   poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16252   return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
   16253 						 (int8x8_t) __b);
   16254 }
   16255 
   16256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16257 vcgez_s8 (int8x8_t __a)
   16258 {
   16259   int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16260   return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
   16261 }
   16262 
   16263 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16264 vcgez_s16 (int16x4_t __a)
   16265 {
   16266   int16x4_t __b = {0, 0, 0, 0};
   16267   return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
   16268 }
   16269 
   16270 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16271 vcgez_s32 (int32x2_t __a)
   16272 {
   16273   int32x2_t __b = {0, 0};
   16274   return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
   16275 }
   16276 
   16277 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16278 vcgez_s64 (int64x1_t __a)
   16279 {
   16280   return __a >= 0ll ? -1ll : 0ll;
   16281 }
   16282 
   16283 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16284 vcgez_u8 (uint8x8_t __a)
   16285 {
   16286   uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16287   return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
   16288 						 (int8x8_t) __b);
   16289 }
   16290 
   16291 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16292 vcgez_u16 (uint16x4_t __a)
   16293 {
   16294   uint16x4_t __b = {0, 0, 0, 0};
   16295   return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
   16296 						  (int16x4_t) __b);
   16297 }
   16298 
   16299 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16300 vcgez_u32 (uint32x2_t __a)
   16301 {
   16302   uint32x2_t __b = {0, 0};
   16303   return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
   16304 						  (int32x2_t) __b);
   16305 }
   16306 
   16307 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16308 vcgez_u64 (uint64x1_t __a)
   16309 {
   16310   return __a >= 0ll ? -1ll : 0ll;
   16311 }
   16312 
   16313 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16314 vcgezq_f32 (float32x4_t __a)
   16315 {
   16316   float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
   16317   return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
   16318 }
   16319 
   16320 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16321 vcgezq_f64 (float64x2_t __a)
   16322 {
   16323   float64x2_t __b = {0.0, 0.0};
   16324   return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
   16325 }
   16326 
   16327 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16328 vcgezq_p8 (poly8x16_t __a)
   16329 {
   16330   poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   16331 		    0, 0, 0, 0, 0, 0, 0, 0};
   16332   return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
   16333 						   (int8x16_t) __b);
   16334 }
   16335 
   16336 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16337 vcgezq_s8 (int8x16_t __a)
   16338 {
   16339   int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   16340 		   0, 0, 0, 0, 0, 0, 0, 0};
   16341   return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
   16342 }
   16343 
   16344 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16345 vcgezq_s16 (int16x8_t __a)
   16346 {
   16347   int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16348   return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
   16349 }
   16350 
   16351 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16352 vcgezq_s32 (int32x4_t __a)
   16353 {
   16354   int32x4_t __b = {0, 0, 0, 0};
   16355   return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
   16356 }
   16357 
   16358 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16359 vcgezq_s64 (int64x2_t __a)
   16360 {
   16361   int64x2_t __b = {0, 0};
   16362   return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
   16363 }
   16364 
   16365 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16366 vcgezq_u8 (uint8x16_t __a)
   16367 {
   16368   uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   16369 		    0, 0, 0, 0, 0, 0, 0, 0};
   16370   return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
   16371 						   (int8x16_t) __b);
   16372 }
   16373 
   16374 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16375 vcgezq_u16 (uint16x8_t __a)
   16376 {
   16377   uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16378   return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
   16379 						  (int16x8_t) __b);
   16380 }
   16381 
   16382 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16383 vcgezq_u32 (uint32x4_t __a)
   16384 {
   16385   uint32x4_t __b = {0, 0, 0, 0};
   16386   return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
   16387 						  (int32x4_t) __b);
   16388 }
   16389 
   16390 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16391 vcgezq_u64 (uint64x2_t __a)
   16392 {
   16393   uint64x2_t __b = {0, 0};
   16394   return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
   16395 						  (int64x2_t) __b);
   16396 }
   16397 
   16398 /* vcgez - scalar.  */
   16399 
   16400 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   16401 vcgezs_f32 (float32_t __a)
   16402 {
   16403   return __a >= 0.0f ? -1 : 0;
   16404 }
   16405 
   16406 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16407 vcgezd_s64 (int64x1_t __a)
   16408 {
   16409   return __a >= 0 ? -1ll : 0ll;
   16410 }
   16411 
   16412 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16413 vcgezd_u64 (int64x1_t __a)
   16414 {
   16415   return __a >= 0 ? -1ll : 0ll;
   16416 }
   16417 
   16418 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   16419 vcgezd_f64 (float64_t __a)
   16420 {
   16421   return __a >= 0.0 ? -1ll : 0ll;
   16422 }
   16423 
   16424 /* vcgt - vector.  */
   16425 
   16426 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16427 vcgt_f32 (float32x2_t __a, float32x2_t __b)
   16428 {
   16429   return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
   16430 }
   16431 
   16432 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16433 vcgt_f64 (float64x1_t __a, float64x1_t __b)
   16434 {
   16435   return __a > __b ? -1ll : 0ll;
   16436 }
   16437 
   16438 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16439 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
   16440 {
   16441   return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
   16442 						 (int8x8_t) __b);
   16443 }
   16444 
   16445 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16446 vcgt_s8 (int8x8_t __a, int8x8_t __b)
   16447 {
   16448   return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
   16449 }
   16450 
   16451 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16452 vcgt_s16 (int16x4_t __a, int16x4_t __b)
   16453 {
   16454   return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
   16455 }
   16456 
   16457 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16458 vcgt_s32 (int32x2_t __a, int32x2_t __b)
   16459 {
   16460   return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
   16461 }
   16462 
   16463 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16464 vcgt_s64 (int64x1_t __a, int64x1_t __b)
   16465 {
   16466   return __a > __b ? -1ll : 0ll;
   16467 }
   16468 
   16469 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16470 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
   16471 {
   16472   return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
   16473 						 (int8x8_t) __b);
   16474 }
   16475 
   16476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16477 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
   16478 {
   16479   return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
   16480 						  (int16x4_t) __b);
   16481 }
   16482 
   16483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16484 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
   16485 {
   16486   return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
   16487 						  (int32x2_t) __b);
   16488 }
   16489 
   16490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16491 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
   16492 {
   16493   return __a > __b ? -1ll : 0ll;
   16494 }
   16495 
   16496 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16497 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
   16498 {
   16499   return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
   16500 }
   16501 
   16502 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16503 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
   16504 {
   16505   return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
   16506 }
   16507 
   16508 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16509 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
   16510 {
   16511   return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
   16512 						   (int8x16_t) __b);
   16513 }
   16514 
   16515 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16516 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
   16517 {
   16518   return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
   16519 }
   16520 
   16521 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16522 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
   16523 {
   16524   return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
   16525 }
   16526 
   16527 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16528 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
   16529 {
   16530   return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
   16531 }
   16532 
   16533 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16534 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
   16535 {
   16536   return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
   16537 }
   16538 
   16539 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16540 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
   16541 {
   16542   return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
   16543 						   (int8x16_t) __b);
   16544 }
   16545 
   16546 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16547 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
   16548 {
   16549   return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
   16550 						  (int16x8_t) __b);
   16551 }
   16552 
   16553 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16554 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
   16555 {
   16556   return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
   16557 						  (int32x4_t) __b);
   16558 }
   16559 
   16560 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16561 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
   16562 {
   16563   return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
   16564 						  (int64x2_t) __b);
   16565 }
   16566 
   16567 /* vcgt - scalar.  */
   16568 
   16569 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   16570 vcgts_f32 (float32_t __a, float32_t __b)
   16571 {
   16572   return __a > __b ? -1 : 0;
   16573 }
   16574 
   16575 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16576 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
   16577 {
   16578   return __a > __b ? -1ll : 0ll;
   16579 }
   16580 
   16581 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16582 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
   16583 {
   16584   return __a > __b ? -1ll : 0ll;
   16585 }
   16586 
   16587 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   16588 vcgtd_f64 (float64_t __a, float64_t __b)
   16589 {
   16590   return __a > __b ? -1ll : 0ll;
   16591 }
   16592 
   16593 /* vcgtz - vector.  */
   16594 
   16595 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16596 vcgtz_f32 (float32x2_t __a)
   16597 {
   16598   float32x2_t __b = {0.0f, 0.0f};
   16599   return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
   16600 }
   16601 
   16602 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16603 vcgtz_f64 (float64x1_t __a)
   16604 {
   16605   return __a > 0.0 ? -1ll : 0ll;
   16606 }
   16607 
   16608 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16609 vcgtz_p8 (poly8x8_t __a)
   16610 {
   16611   poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16612   return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
   16613 						 (int8x8_t) __b);
   16614 }
   16615 
   16616 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16617 vcgtz_s8 (int8x8_t __a)
   16618 {
   16619   int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16620   return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
   16621 }
   16622 
   16623 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16624 vcgtz_s16 (int16x4_t __a)
   16625 {
   16626   int16x4_t __b = {0, 0, 0, 0};
   16627   return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
   16628 }
   16629 
   16630 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16631 vcgtz_s32 (int32x2_t __a)
   16632 {
   16633   int32x2_t __b = {0, 0};
   16634   return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
   16635 }
   16636 
   16637 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16638 vcgtz_s64 (int64x1_t __a)
   16639 {
   16640   return __a > 0ll ? -1ll : 0ll;
   16641 }
   16642 
   16643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16644 vcgtz_u8 (uint8x8_t __a)
   16645 {
   16646   uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16647   return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
   16648 						 (int8x8_t) __b);
   16649 }
   16650 
   16651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16652 vcgtz_u16 (uint16x4_t __a)
   16653 {
   16654   uint16x4_t __b = {0, 0, 0, 0};
   16655   return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
   16656 						  (int16x4_t) __b);
   16657 }
   16658 
   16659 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16660 vcgtz_u32 (uint32x2_t __a)
   16661 {
   16662   uint32x2_t __b = {0, 0};
   16663   return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
   16664 						  (int32x2_t) __b);
   16665 }
   16666 
   16667 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16668 vcgtz_u64 (uint64x1_t __a)
   16669 {
   16670   return __a > 0ll ? -1ll : 0ll;
   16671 }
   16672 
   16673 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16674 vcgtzq_f32 (float32x4_t __a)
   16675 {
   16676   float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
   16677   return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
   16678 }
   16679 
   16680 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16681 vcgtzq_f64 (float64x2_t __a)
   16682 {
   16683   float64x2_t __b = {0.0, 0.0};
   16684   return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
   16685 }
   16686 
   16687 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16688 vcgtzq_p8 (poly8x16_t __a)
   16689 {
   16690   poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   16691 		    0, 0, 0, 0, 0, 0, 0, 0};
   16692   return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
   16693 						   (int8x16_t) __b);
   16694 }
   16695 
   16696 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16697 vcgtzq_s8 (int8x16_t __a)
   16698 {
   16699   int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   16700 		   0, 0, 0, 0, 0, 0, 0, 0};
   16701   return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
   16702 }
   16703 
   16704 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16705 vcgtzq_s16 (int16x8_t __a)
   16706 {
   16707   int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16708   return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
   16709 }
   16710 
   16711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16712 vcgtzq_s32 (int32x4_t __a)
   16713 {
   16714   int32x4_t __b = {0, 0, 0, 0};
   16715   return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
   16716 }
   16717 
   16718 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16719 vcgtzq_s64 (int64x2_t __a)
   16720 {
   16721   int64x2_t __b = {0, 0};
   16722   return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
   16723 }
   16724 
   16725 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16726 vcgtzq_u8 (uint8x16_t __a)
   16727 {
   16728   uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   16729 		    0, 0, 0, 0, 0, 0, 0, 0};
   16730   return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
   16731 						   (int8x16_t) __b);
   16732 }
   16733 
   16734 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16735 vcgtzq_u16 (uint16x8_t __a)
   16736 {
   16737   uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16738   return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
   16739 						  (int16x8_t) __b);
   16740 }
   16741 
   16742 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16743 vcgtzq_u32 (uint32x4_t __a)
   16744 {
   16745   uint32x4_t __b = {0, 0, 0, 0};
   16746   return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
   16747 						  (int32x4_t) __b);
   16748 }
   16749 
   16750 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16751 vcgtzq_u64 (uint64x2_t __a)
   16752 {
   16753   uint64x2_t __b = {0, 0};
   16754   return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
   16755 						  (int64x2_t) __b);
   16756 }
   16757 
   16758 /* vcgtz - scalar.  */
   16759 
   16760 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   16761 vcgtzs_f32 (float32_t __a)
   16762 {
   16763   return __a > 0.0f ? -1 : 0;
   16764 }
   16765 
   16766 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16767 vcgtzd_s64 (int64x1_t __a)
   16768 {
   16769   return __a > 0 ? -1ll : 0ll;
   16770 }
   16771 
   16772 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16773 vcgtzd_u64 (int64x1_t __a)
   16774 {
   16775   return __a > 0 ? -1ll : 0ll;
   16776 }
   16777 
   16778 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   16779 vcgtzd_f64 (float64_t __a)
   16780 {
   16781   return __a > 0.0 ? -1ll : 0ll;
   16782 }
   16783 
   16784 /* vcle - vector.  */
   16785 
   16786 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16787 vcle_f32 (float32x2_t __a, float32x2_t __b)
   16788 {
   16789   return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
   16790 }
   16791 
   16792 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16793 vcle_f64 (float64x1_t __a, float64x1_t __b)
   16794 {
   16795   return __a <= __b ? -1ll : 0ll;
   16796 }
   16797 
   16798 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16799 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
   16800 {
   16801   return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
   16802 						 (int8x8_t) __a);
   16803 }
   16804 
   16805 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16806 vcle_s8 (int8x8_t __a, int8x8_t __b)
   16807 {
   16808   return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
   16809 }
   16810 
   16811 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16812 vcle_s16 (int16x4_t __a, int16x4_t __b)
   16813 {
   16814   return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
   16815 }
   16816 
   16817 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16818 vcle_s32 (int32x2_t __a, int32x2_t __b)
   16819 {
   16820   return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
   16821 }
   16822 
   16823 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16824 vcle_s64 (int64x1_t __a, int64x1_t __b)
   16825 {
   16826   return __a <= __b ? -1ll : 0ll;
   16827 }
   16828 
   16829 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16830 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
   16831 {
   16832   return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
   16833 						 (int8x8_t) __a);
   16834 }
   16835 
   16836 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16837 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
   16838 {
   16839   return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
   16840 						  (int16x4_t) __a);
   16841 }
   16842 
   16843 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16844 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
   16845 {
   16846   return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
   16847 						  (int32x2_t) __a);
   16848 }
   16849 
   16850 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16851 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
   16852 {
   16853   return __a <= __b ? -1ll : 0ll;
   16854 }
   16855 
   16856 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16857 vcleq_f32 (float32x4_t __a, float32x4_t __b)
   16858 {
   16859   return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
   16860 }
   16861 
   16862 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16863 vcleq_f64 (float64x2_t __a, float64x2_t __b)
   16864 {
   16865   return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
   16866 }
   16867 
   16868 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16869 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
   16870 {
   16871   return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
   16872 						   (int8x16_t) __a);
   16873 }
   16874 
   16875 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16876 vcleq_s8 (int8x16_t __a, int8x16_t __b)
   16877 {
   16878   return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
   16879 }
   16880 
   16881 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16882 vcleq_s16 (int16x8_t __a, int16x8_t __b)
   16883 {
   16884   return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
   16885 }
   16886 
   16887 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16888 vcleq_s32 (int32x4_t __a, int32x4_t __b)
   16889 {
   16890   return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
   16891 }
   16892 
   16893 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16894 vcleq_s64 (int64x2_t __a, int64x2_t __b)
   16895 {
   16896   return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
   16897 }
   16898 
   16899 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   16900 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
   16901 {
   16902   return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
   16903 						   (int8x16_t) __a);
   16904 }
   16905 
   16906 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   16907 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
   16908 {
   16909   return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
   16910 						  (int16x8_t) __a);
   16911 }
   16912 
   16913 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   16914 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
   16915 {
   16916   return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
   16917 						  (int32x4_t) __a);
   16918 }
   16919 
   16920 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   16921 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
   16922 {
   16923   return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
   16924 						  (int64x2_t) __a);
   16925 }
   16926 
   16927 /* vcle - scalar.  */
   16928 
   16929 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   16930 vcles_f32 (float32_t __a, float32_t __b)
   16931 {
   16932   return __a <= __b ? -1 : 0;
   16933 }
   16934 
   16935 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16936 vcled_s64 (int64x1_t __a, int64x1_t __b)
   16937 {
   16938   return __a <= __b ? -1ll : 0ll;
   16939 }
   16940 
   16941 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16942 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
   16943 {
   16944   return __a <= __b ? -1ll : 0ll;
   16945 }
   16946 
   16947 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   16948 vcled_f64 (float64_t __a, float64_t __b)
   16949 {
   16950   return __a <= __b ? -1ll : 0ll;
   16951 }
   16952 
   16953 /* vclez - vector.  */
   16954 
   16955 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16956 vclez_f32 (float32x2_t __a)
   16957 {
   16958   float32x2_t __b = {0.0f, 0.0f};
   16959   return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
   16960 }
   16961 
   16962 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16963 vclez_f64 (float64x1_t __a)
   16964 {
   16965   return __a <= 0.0 ? -1ll : 0ll;
   16966 }
   16967 
   16968 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16969 vclez_p8 (poly8x8_t __a)
   16970 {
   16971   poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16972   return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
   16973 						 (int8x8_t) __b);
   16974 }
   16975 
   16976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   16977 vclez_s8 (int8x8_t __a)
   16978 {
   16979   int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   16980   return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
   16981 }
   16982 
   16983 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   16984 vclez_s16 (int16x4_t __a)
   16985 {
   16986   int16x4_t __b = {0, 0, 0, 0};
   16987   return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
   16988 }
   16989 
   16990 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   16991 vclez_s32 (int32x2_t __a)
   16992 {
   16993   int32x2_t __b = {0, 0};
   16994   return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
   16995 }
   16996 
   16997 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   16998 vclez_s64 (int64x1_t __a)
   16999 {
   17000   return __a <= 0ll ? -1ll : 0ll;
   17001 }
   17002 
   17003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17004 vclez_u64 (uint64x1_t __a)
   17005 {
   17006   return __a <= 0ll ? -1ll : 0ll;
   17007 }
   17008 
   17009 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17010 vclezq_f32 (float32x4_t __a)
   17011 {
   17012   float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
   17013   return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
   17014 }
   17015 
   17016 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17017 vclezq_f64 (float64x2_t __a)
   17018 {
   17019   float64x2_t __b = {0.0, 0.0};
   17020   return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
   17021 }
   17022 
   17023 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17024 vclezq_p8 (poly8x16_t __a)
   17025 {
   17026   poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   17027 		    0, 0, 0, 0, 0, 0, 0, 0};
   17028   return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
   17029 						   (int8x16_t) __b);
   17030 }
   17031 
   17032 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17033 vclezq_s8 (int8x16_t __a)
   17034 {
   17035   int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   17036 		   0, 0, 0, 0, 0, 0, 0, 0};
   17037   return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
   17038 }
   17039 
   17040 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   17041 vclezq_s16 (int16x8_t __a)
   17042 {
   17043   int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   17044   return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
   17045 }
   17046 
   17047 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17048 vclezq_s32 (int32x4_t __a)
   17049 {
   17050   int32x4_t __b = {0, 0, 0, 0};
   17051   return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
   17052 }
   17053 
   17054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17055 vclezq_s64 (int64x2_t __a)
   17056 {
   17057   int64x2_t __b = {0, 0};
   17058   return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
   17059 }
   17060 
   17061 /* vclez - scalar.  */
   17062 
   17063 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17064 vclezs_f32 (float32_t __a)
   17065 {
   17066   return __a <= 0.0f ? -1 : 0;
   17067 }
   17068 
   17069 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17070 vclezd_s64 (int64x1_t __a)
   17071 {
   17072   return __a <= 0 ? -1ll : 0ll;
   17073 }
   17074 
   17075 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17076 vclezd_u64 (int64x1_t __a)
   17077 {
   17078   return __a <= 0 ? -1ll : 0ll;
   17079 }
   17080 
   17081 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17082 vclezd_f64 (float64_t __a)
   17083 {
   17084   return __a <= 0.0 ? -1ll : 0ll;
   17085 }
   17086 
   17087 /* vclt - vector.  */
   17088 
   17089 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17090 vclt_f32 (float32x2_t __a, float32x2_t __b)
   17091 {
   17092   return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
   17093 }
   17094 
   17095 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17096 vclt_f64 (float64x1_t __a, float64x1_t __b)
   17097 {
   17098   return __a < __b ? -1ll : 0ll;
   17099 }
   17100 
   17101 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   17102 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
   17103 {
   17104   return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
   17105 						 (int8x8_t) __a);
   17106 }
   17107 
   17108 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   17109 vclt_s8 (int8x8_t __a, int8x8_t __b)
   17110 {
   17111   return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
   17112 }
   17113 
   17114 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   17115 vclt_s16 (int16x4_t __a, int16x4_t __b)
   17116 {
   17117   return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
   17118 }
   17119 
   17120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17121 vclt_s32 (int32x2_t __a, int32x2_t __b)
   17122 {
   17123   return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
   17124 }
   17125 
   17126 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17127 vclt_s64 (int64x1_t __a, int64x1_t __b)
   17128 {
   17129   return __a < __b ? -1ll : 0ll;
   17130 }
   17131 
   17132 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   17133 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
   17134 {
   17135   return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
   17136 						 (int8x8_t) __a);
   17137 }
   17138 
   17139 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   17140 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
   17141 {
   17142   return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
   17143 						  (int16x4_t) __a);
   17144 }
   17145 
   17146 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17147 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
   17148 {
   17149   return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
   17150 						  (int32x2_t) __a);
   17151 }
   17152 
   17153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17154 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
   17155 {
   17156   return __a < __b ? -1ll : 0ll;
   17157 }
   17158 
   17159 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17160 vcltq_f32 (float32x4_t __a, float32x4_t __b)
   17161 {
   17162   return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
   17163 }
   17164 
   17165 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17166 vcltq_f64 (float64x2_t __a, float64x2_t __b)
   17167 {
   17168   return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
   17169 }
   17170 
   17171 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17172 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
   17173 {
   17174   return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
   17175 						   (int8x16_t) __a);
   17176 }
   17177 
   17178 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17179 vcltq_s8 (int8x16_t __a, int8x16_t __b)
   17180 {
   17181   return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
   17182 }
   17183 
   17184 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   17185 vcltq_s16 (int16x8_t __a, int16x8_t __b)
   17186 {
   17187   return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
   17188 }
   17189 
   17190 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17191 vcltq_s32 (int32x4_t __a, int32x4_t __b)
   17192 {
   17193   return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
   17194 }
   17195 
   17196 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17197 vcltq_s64 (int64x2_t __a, int64x2_t __b)
   17198 {
   17199   return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
   17200 }
   17201 
   17202 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17203 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
   17204 {
   17205   return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
   17206 						   (int8x16_t) __a);
   17207 }
   17208 
   17209 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   17210 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
   17211 {
   17212   return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
   17213 						  (int16x8_t) __a);
   17214 }
   17215 
   17216 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17217 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
   17218 {
   17219   return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
   17220 						  (int32x4_t) __a);
   17221 }
   17222 
   17223 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17224 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
   17225 {
   17226   return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
   17227 						  (int64x2_t) __a);
   17228 }
   17229 
   17230 /* vclt - scalar.  */
   17231 
   17232 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17233 vclts_f32 (float32_t __a, float32_t __b)
   17234 {
   17235   return __a < __b ? -1 : 0;
   17236 }
   17237 
   17238 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17239 vcltd_s64 (int64x1_t __a, int64x1_t __b)
   17240 {
   17241   return __a < __b ? -1ll : 0ll;
   17242 }
   17243 
   17244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17245 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
   17246 {
   17247   return __a < __b ? -1ll : 0ll;
   17248 }
   17249 
   17250 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17251 vcltd_f64 (float64_t __a, float64_t __b)
   17252 {
   17253   return __a < __b ? -1ll : 0ll;
   17254 }
   17255 
   17256 /* vcltz - vector.  */
   17257 
   17258 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17259 vcltz_f32 (float32x2_t __a)
   17260 {
   17261   float32x2_t __b = {0.0f, 0.0f};
   17262   return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
   17263 }
   17264 
   17265 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17266 vcltz_f64 (float64x1_t __a)
   17267 {
   17268   return __a < 0.0 ? -1ll : 0ll;
   17269 }
   17270 
   17271 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   17272 vcltz_p8 (poly8x8_t __a)
   17273 {
   17274   poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   17275   return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
   17276 						 (int8x8_t) __b);
   17277 }
   17278 
   17279 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   17280 vcltz_s8 (int8x8_t __a)
   17281 {
   17282   int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   17283   return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
   17284 }
   17285 
   17286 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   17287 vcltz_s16 (int16x4_t __a)
   17288 {
   17289   int16x4_t __b = {0, 0, 0, 0};
   17290   return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
   17291 }
   17292 
   17293 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17294 vcltz_s32 (int32x2_t __a)
   17295 {
   17296   int32x2_t __b = {0, 0};
   17297   return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
   17298 }
   17299 
   17300 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17301 vcltz_s64 (int64x1_t __a)
   17302 {
   17303   return __a < 0ll ? -1ll : 0ll;
   17304 }
   17305 
   17306 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17307 vcltzq_f32 (float32x4_t __a)
   17308 {
   17309   float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
   17310   return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
   17311 }
   17312 
   17313 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17314 vcltzq_f64 (float64x2_t __a)
   17315 {
   17316   float64x2_t __b = {0.0, 0.0};
   17317   return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
   17318 }
   17319 
   17320 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17321 vcltzq_p8 (poly8x16_t __a)
   17322 {
   17323   poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   17324 		    0, 0, 0, 0, 0, 0, 0, 0};
   17325   return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
   17326 						   (int8x16_t) __b);
   17327 }
   17328 
   17329 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17330 vcltzq_s8 (int8x16_t __a)
   17331 {
   17332   int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
   17333 		   0, 0, 0, 0, 0, 0, 0, 0};
   17334   return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
   17335 }
   17336 
   17337 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   17338 vcltzq_s16 (int16x8_t __a)
   17339 {
   17340   int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
   17341   return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
   17342 }
   17343 
   17344 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17345 vcltzq_s32 (int32x4_t __a)
   17346 {
   17347   int32x4_t __b = {0, 0, 0, 0};
   17348   return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
   17349 }
   17350 
   17351 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17352 vcltzq_s64 (int64x2_t __a)
   17353 {
   17354   int64x2_t __b = {0, 0};
   17355   return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
   17356 }
   17357 
   17358 /* vcltz - scalar.  */
   17359 
   17360 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17361 vcltzs_f32 (float32_t __a)
   17362 {
   17363   return __a < 0.0f ? -1 : 0;
   17364 }
   17365 
   17366 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17367 vcltzd_s64 (int64x1_t __a)
   17368 {
   17369   return __a < 0 ? -1ll : 0ll;
   17370 }
   17371 
   17372 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17373 vcltzd_u64 (int64x1_t __a)
   17374 {
   17375   return __a < 0 ? -1ll : 0ll;
   17376 }
   17377 
   17378 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17379 vcltzd_f64 (float64_t __a)
   17380 {
   17381   return __a < 0.0 ? -1ll : 0ll;
   17382 }
   17383 
   17384 /* vclz.  */
   17385 
   17386 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   17387 vclz_s8 (int8x8_t __a)
   17388 {
   17389   return __builtin_aarch64_clzv8qi (__a);
   17390 }
   17391 
   17392 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   17393 vclz_s16 (int16x4_t __a)
   17394 {
   17395   return __builtin_aarch64_clzv4hi (__a);
   17396 }
   17397 
   17398 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   17399 vclz_s32 (int32x2_t __a)
   17400 {
   17401   return __builtin_aarch64_clzv2si (__a);
   17402 }
   17403 
   17404 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   17405 vclz_u8 (uint8x8_t __a)
   17406 {
   17407   return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
   17408 }
   17409 
   17410 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   17411 vclz_u16 (uint16x4_t __a)
   17412 {
   17413   return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
   17414 }
   17415 
   17416 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17417 vclz_u32 (uint32x2_t __a)
   17418 {
   17419   return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
   17420 }
   17421 
   17422 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   17423 vclzq_s8 (int8x16_t __a)
   17424 {
   17425   return __builtin_aarch64_clzv16qi (__a);
   17426 }
   17427 
   17428 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   17429 vclzq_s16 (int16x8_t __a)
   17430 {
   17431   return __builtin_aarch64_clzv8hi (__a);
   17432 }
   17433 
   17434 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   17435 vclzq_s32 (int32x4_t __a)
   17436 {
   17437   return __builtin_aarch64_clzv4si (__a);
   17438 }
   17439 
   17440 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   17441 vclzq_u8 (uint8x16_t __a)
   17442 {
   17443   return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
   17444 }
   17445 
   17446 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   17447 vclzq_u16 (uint16x8_t __a)
   17448 {
   17449   return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
   17450 }
   17451 
   17452 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17453 vclzq_u32 (uint32x4_t __a)
   17454 {
   17455   return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
   17456 }
   17457 
   17458 /* vcvt (double -> float).  */
   17459 
   17460 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   17461 vcvt_f32_f64 (float64x2_t __a)
   17462 {
   17463   return __builtin_aarch64_float_truncate_lo_v2sf (__a);
   17464 }
   17465 
   17466 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   17467 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
   17468 {
   17469   return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
   17470 }
   17471 
   17472 /* vcvt (float -> double).  */
   17473 
   17474 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   17475 vcvt_f64_f32 (float32x2_t __a)
   17476 {
   17477 
   17478   return __builtin_aarch64_float_extend_lo_v2df (__a);
   17479 }
   17480 
   17481 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   17482 vcvt_high_f64_f32 (float32x4_t __a)
   17483 {
   17484   return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
   17485 }
   17486 
   17487 /* vcvt  (<u>int -> float)  */
   17488 
   17489 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   17490 vcvtd_f64_s64 (int64_t __a)
   17491 {
   17492   return (float64_t) __a;
   17493 }
   17494 
   17495 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   17496 vcvtd_f64_u64 (uint64_t __a)
   17497 {
   17498   return (float64_t) __a;
   17499 }
   17500 
   17501 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   17502 vcvts_f32_s32 (int32_t __a)
   17503 {
   17504   return (float32_t) __a;
   17505 }
   17506 
   17507 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   17508 vcvts_f32_u32 (uint32_t __a)
   17509 {
   17510   return (float32_t) __a;
   17511 }
   17512 
   17513 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   17514 vcvt_f32_s32 (int32x2_t __a)
   17515 {
   17516   return __builtin_aarch64_floatv2siv2sf (__a);
   17517 }
   17518 
   17519 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   17520 vcvt_f32_u32 (uint32x2_t __a)
   17521 {
   17522   return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
   17523 }
   17524 
   17525 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   17526 vcvtq_f32_s32 (int32x4_t __a)
   17527 {
   17528   return __builtin_aarch64_floatv4siv4sf (__a);
   17529 }
   17530 
   17531 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   17532 vcvtq_f32_u32 (uint32x4_t __a)
   17533 {
   17534   return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
   17535 }
   17536 
   17537 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   17538 vcvtq_f64_s64 (int64x2_t __a)
   17539 {
   17540   return __builtin_aarch64_floatv2div2df (__a);
   17541 }
   17542 
   17543 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   17544 vcvtq_f64_u64 (uint64x2_t __a)
   17545 {
   17546   return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
   17547 }
   17548 
   17549 /* vcvt (float -> <u>int)  */
   17550 
   17551 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   17552 vcvtd_s64_f64 (float64_t __a)
   17553 {
   17554   return (int64_t) __a;
   17555 }
   17556 
   17557 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17558 vcvtd_u64_f64 (float64_t __a)
   17559 {
   17560   return (uint64_t) __a;
   17561 }
   17562 
   17563 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   17564 vcvts_s32_f32 (float32_t __a)
   17565 {
   17566   return (int32_t) __a;
   17567 }
   17568 
   17569 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17570 vcvts_u32_f32 (float32_t __a)
   17571 {
   17572   return (uint32_t) __a;
   17573 }
   17574 
   17575 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   17576 vcvt_s32_f32 (float32x2_t __a)
   17577 {
   17578   return __builtin_aarch64_lbtruncv2sfv2si (__a);
   17579 }
   17580 
   17581 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17582 vcvt_u32_f32 (float32x2_t __a)
   17583 {
   17584   /* TODO: This cast should go away when builtins have
   17585      their correct types.  */
   17586   return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
   17587 }
   17588 
   17589 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   17590 vcvtq_s32_f32 (float32x4_t __a)
   17591 {
   17592   return __builtin_aarch64_lbtruncv4sfv4si (__a);
   17593 }
   17594 
   17595 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17596 vcvtq_u32_f32 (float32x4_t __a)
   17597 {
   17598   /* TODO: This cast should go away when builtins have
   17599      their correct types.  */
   17600   return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
   17601 }
   17602 
   17603 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   17604 vcvtq_s64_f64 (float64x2_t __a)
   17605 {
   17606   return __builtin_aarch64_lbtruncv2dfv2di (__a);
   17607 }
   17608 
   17609 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17610 vcvtq_u64_f64 (float64x2_t __a)
   17611 {
   17612   /* TODO: This cast should go away when builtins have
   17613      their correct types.  */
   17614   return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
   17615 }
   17616 
   17617 /* vcvta  */
   17618 
   17619 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   17620 vcvtad_s64_f64 (float64_t __a)
   17621 {
   17622   return __builtin_aarch64_lrounddfdi (__a);
   17623 }
   17624 
   17625 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17626 vcvtad_u64_f64 (float64_t __a)
   17627 {
   17628   return __builtin_aarch64_lroundudfdi (__a);
   17629 }
   17630 
   17631 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   17632 vcvtas_s32_f32 (float32_t __a)
   17633 {
   17634   return __builtin_aarch64_lroundsfsi (__a);
   17635 }
   17636 
   17637 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17638 vcvtas_u32_f32 (float32_t __a)
   17639 {
   17640   return __builtin_aarch64_lroundusfsi (__a);
   17641 }
   17642 
   17643 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   17644 vcvta_s32_f32 (float32x2_t __a)
   17645 {
   17646   return __builtin_aarch64_lroundv2sfv2si (__a);
   17647 }
   17648 
   17649 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17650 vcvta_u32_f32 (float32x2_t __a)
   17651 {
   17652   /* TODO: This cast should go away when builtins have
   17653      their correct types.  */
   17654   return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
   17655 }
   17656 
   17657 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   17658 vcvtaq_s32_f32 (float32x4_t __a)
   17659 {
   17660   return __builtin_aarch64_lroundv4sfv4si (__a);
   17661 }
   17662 
   17663 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17664 vcvtaq_u32_f32 (float32x4_t __a)
   17665 {
   17666   /* TODO: This cast should go away when builtins have
   17667      their correct types.  */
   17668   return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
   17669 }
   17670 
   17671 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   17672 vcvtaq_s64_f64 (float64x2_t __a)
   17673 {
   17674   return __builtin_aarch64_lroundv2dfv2di (__a);
   17675 }
   17676 
   17677 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17678 vcvtaq_u64_f64 (float64x2_t __a)
   17679 {
   17680   /* TODO: This cast should go away when builtins have
   17681      their correct types.  */
   17682   return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
   17683 }
   17684 
   17685 /* vcvtm  */
   17686 
   17687 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   17688 vcvtmd_s64_f64 (float64_t __a)
   17689 {
   17690   return __builtin_llfloor (__a);
   17691 }
   17692 
   17693 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17694 vcvtmd_u64_f64 (float64_t __a)
   17695 {
   17696   return __builtin_aarch64_lfloorudfdi (__a);
   17697 }
   17698 
   17699 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   17700 vcvtms_s32_f32 (float32_t __a)
   17701 {
   17702   return __builtin_ifloorf (__a);
   17703 }
   17704 
   17705 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17706 vcvtms_u32_f32 (float32_t __a)
   17707 {
   17708   return __builtin_aarch64_lfloorusfsi (__a);
   17709 }
   17710 
   17711 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   17712 vcvtm_s32_f32 (float32x2_t __a)
   17713 {
   17714   return __builtin_aarch64_lfloorv2sfv2si (__a);
   17715 }
   17716 
   17717 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17718 vcvtm_u32_f32 (float32x2_t __a)
   17719 {
   17720   /* TODO: This cast should go away when builtins have
   17721      their correct types.  */
   17722   return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
   17723 }
   17724 
   17725 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   17726 vcvtmq_s32_f32 (float32x4_t __a)
   17727 {
   17728   return __builtin_aarch64_lfloorv4sfv4si (__a);
   17729 }
   17730 
   17731 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17732 vcvtmq_u32_f32 (float32x4_t __a)
   17733 {
   17734   /* TODO: This cast should go away when builtins have
   17735      their correct types.  */
   17736   return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
   17737 }
   17738 
   17739 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   17740 vcvtmq_s64_f64 (float64x2_t __a)
   17741 {
   17742   return __builtin_aarch64_lfloorv2dfv2di (__a);
   17743 }
   17744 
   17745 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17746 vcvtmq_u64_f64 (float64x2_t __a)
   17747 {
   17748   /* TODO: This cast should go away when builtins have
   17749      their correct types.  */
   17750   return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
   17751 }
   17752 
   17753 /* vcvtn  */
   17754 
   17755 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   17756 vcvtnd_s64_f64 (float64_t __a)
   17757 {
   17758   return __builtin_aarch64_lfrintndfdi (__a);
   17759 }
   17760 
   17761 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17762 vcvtnd_u64_f64 (float64_t __a)
   17763 {
   17764   return __builtin_aarch64_lfrintnudfdi (__a);
   17765 }
   17766 
   17767 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   17768 vcvtns_s32_f32 (float32_t __a)
   17769 {
   17770   return __builtin_aarch64_lfrintnsfsi (__a);
   17771 }
   17772 
   17773 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17774 vcvtns_u32_f32 (float32_t __a)
   17775 {
   17776   return __builtin_aarch64_lfrintnusfsi (__a);
   17777 }
   17778 
   17779 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   17780 vcvtn_s32_f32 (float32x2_t __a)
   17781 {
   17782   return __builtin_aarch64_lfrintnv2sfv2si (__a);
   17783 }
   17784 
   17785 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17786 vcvtn_u32_f32 (float32x2_t __a)
   17787 {
   17788   /* TODO: This cast should go away when builtins have
   17789      their correct types.  */
   17790   return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
   17791 }
   17792 
   17793 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   17794 vcvtnq_s32_f32 (float32x4_t __a)
   17795 {
   17796   return __builtin_aarch64_lfrintnv4sfv4si (__a);
   17797 }
   17798 
   17799 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17800 vcvtnq_u32_f32 (float32x4_t __a)
   17801 {
   17802   /* TODO: This cast should go away when builtins have
   17803      their correct types.  */
   17804   return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
   17805 }
   17806 
   17807 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   17808 vcvtnq_s64_f64 (float64x2_t __a)
   17809 {
   17810   return __builtin_aarch64_lfrintnv2dfv2di (__a);
   17811 }
   17812 
   17813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17814 vcvtnq_u64_f64 (float64x2_t __a)
   17815 {
   17816   /* TODO: This cast should go away when builtins have
   17817      their correct types.  */
   17818   return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
   17819 }
   17820 
   17821 /* vcvtp  */
   17822 
   17823 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   17824 vcvtpd_s64_f64 (float64_t __a)
   17825 {
   17826   return __builtin_llceil (__a);
   17827 }
   17828 
   17829 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   17830 vcvtpd_u64_f64 (float64_t __a)
   17831 {
   17832   return __builtin_aarch64_lceiludfdi (__a);
   17833 }
   17834 
   17835 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   17836 vcvtps_s32_f32 (float32_t __a)
   17837 {
   17838   return __builtin_iceilf (__a);
   17839 }
   17840 
   17841 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   17842 vcvtps_u32_f32 (float32_t __a)
   17843 {
   17844   return __builtin_aarch64_lceilusfsi (__a);
   17845 }
   17846 
   17847 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   17848 vcvtp_s32_f32 (float32x2_t __a)
   17849 {
   17850   return __builtin_aarch64_lceilv2sfv2si (__a);
   17851 }
   17852 
   17853 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17854 vcvtp_u32_f32 (float32x2_t __a)
   17855 {
   17856   /* TODO: This cast should go away when builtins have
   17857      their correct types.  */
   17858   return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
   17859 }
   17860 
   17861 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   17862 vcvtpq_s32_f32 (float32x4_t __a)
   17863 {
   17864   return __builtin_aarch64_lceilv4sfv4si (__a);
   17865 }
   17866 
   17867 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   17868 vcvtpq_u32_f32 (float32x4_t __a)
   17869 {
   17870   /* TODO: This cast should go away when builtins have
   17871      their correct types.  */
   17872   return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
   17873 }
   17874 
   17875 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   17876 vcvtpq_s64_f64 (float64x2_t __a)
   17877 {
   17878   return __builtin_aarch64_lceilv2dfv2di (__a);
   17879 }
   17880 
   17881 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   17882 vcvtpq_u64_f64 (float64x2_t __a)
   17883 {
   17884   /* TODO: This cast should go away when builtins have
   17885      their correct types.  */
   17886   return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
   17887 }
   17888 
   17889 /* vdup_n  */
   17890 
   17891 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   17892 vdup_n_f32 (float32_t __a)
   17893 {
   17894   return (float32x2_t) {__a, __a};
   17895 }
   17896 
   17897 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   17898 vdup_n_f64 (float64_t __a)
   17899 {
   17900   return __a;
   17901 }
   17902 
   17903 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   17904 vdup_n_p8 (poly8_t __a)
   17905 {
   17906   return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
   17907 }
   17908 
   17909 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   17910 vdup_n_p16 (poly16_t __a)
   17911 {
   17912   return (poly16x4_t) {__a, __a, __a, __a};
   17913 }
   17914 
   17915 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   17916 vdup_n_s8 (int8_t __a)
   17917 {
   17918   return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
   17919 }
   17920 
   17921 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   17922 vdup_n_s16 (int16_t __a)
   17923 {
   17924   return (int16x4_t) {__a, __a, __a, __a};
   17925 }
   17926 
   17927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   17928 vdup_n_s32 (int32_t __a)
   17929 {
   17930   return (int32x2_t) {__a, __a};
   17931 }
   17932 
   17933 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   17934 vdup_n_s64 (int64_t __a)
   17935 {
   17936   return __a;
   17937 }
   17938 
   17939 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   17940 vdup_n_u8 (uint8_t __a)
   17941 {
   17942   return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
   17943 }
   17944 
   17945 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   17946 vdup_n_u16 (uint16_t __a)
   17947 {
   17948   return (uint16x4_t) {__a, __a, __a, __a};
   17949 }
   17950 
   17951 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   17952 vdup_n_u32 (uint32_t __a)
   17953 {
   17954   return (uint32x2_t) {__a, __a};
   17955 }
   17956 
   17957 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   17958 vdup_n_u64 (uint64_t __a)
   17959 {
   17960   return __a;
   17961 }
   17962 
   17963 /* vdupq_n  */
   17964 
   17965 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   17966 vdupq_n_f32 (float32_t __a)
   17967 {
   17968   return (float32x4_t) {__a, __a, __a, __a};
   17969 }
   17970 
   17971 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   17972 vdupq_n_f64 (float64_t __a)
   17973 {
   17974   return (float64x2_t) {__a, __a};
   17975 }
   17976 
   17977 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   17978 vdupq_n_p8 (uint32_t __a)
   17979 {
   17980   return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
   17981 		       __a, __a, __a, __a, __a, __a, __a, __a};
   17982 }
   17983 
   17984 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   17985 vdupq_n_p16 (uint32_t __a)
   17986 {
   17987   return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
   17988 }
   17989 
   17990 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   17991 vdupq_n_s8 (int32_t __a)
   17992 {
   17993   return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
   17994 		      __a, __a, __a, __a, __a, __a, __a, __a};
   17995 }
   17996 
   17997 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   17998 vdupq_n_s16 (int32_t __a)
   17999 {
   18000   return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
   18001 }
   18002 
   18003 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   18004 vdupq_n_s32 (int32_t __a)
   18005 {
   18006   return (int32x4_t) {__a, __a, __a, __a};
   18007 }
   18008 
   18009 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   18010 vdupq_n_s64 (int64_t __a)
   18011 {
   18012   return (int64x2_t) {__a, __a};
   18013 }
   18014 
   18015 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   18016 vdupq_n_u8 (uint32_t __a)
   18017 {
   18018   return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
   18019 		       __a, __a, __a, __a, __a, __a, __a, __a};
   18020 }
   18021 
   18022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   18023 vdupq_n_u16 (uint32_t __a)
   18024 {
   18025   return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
   18026 }
   18027 
   18028 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   18029 vdupq_n_u32 (uint32_t __a)
   18030 {
   18031   return (uint32x4_t) {__a, __a, __a, __a};
   18032 }
   18033 
   18034 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   18035 vdupq_n_u64 (uint64_t __a)
   18036 {
   18037   return (uint64x2_t) {__a, __a};
   18038 }
   18039 
   18040 /* vdup_lane  */
   18041 
   18042 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   18043 vdup_lane_f32 (float32x2_t __a, const int __b)
   18044 {
   18045   return __aarch64_vdup_lane_f32 (__a, __b);
   18046 }
   18047 
   18048 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   18049 vdup_lane_f64 (float64x1_t __a, const int __b)
   18050 {
   18051   return __aarch64_vdup_lane_f64 (__a, __b);
   18052 }
   18053 
   18054 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   18055 vdup_lane_p8 (poly8x8_t __a, const int __b)
   18056 {
   18057   return __aarch64_vdup_lane_p8 (__a, __b);
   18058 }
   18059 
   18060 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   18061 vdup_lane_p16 (poly16x4_t __a, const int __b)
   18062 {
   18063   return __aarch64_vdup_lane_p16 (__a, __b);
   18064 }
   18065 
   18066 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   18067 vdup_lane_s8 (int8x8_t __a, const int __b)
   18068 {
   18069   return __aarch64_vdup_lane_s8 (__a, __b);
   18070 }
   18071 
   18072 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   18073 vdup_lane_s16 (int16x4_t __a, const int __b)
   18074 {
   18075   return __aarch64_vdup_lane_s16 (__a, __b);
   18076 }
   18077 
   18078 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   18079 vdup_lane_s32 (int32x2_t __a, const int __b)
   18080 {
   18081   return __aarch64_vdup_lane_s32 (__a, __b);
   18082 }
   18083 
   18084 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   18085 vdup_lane_s64 (int64x1_t __a, const int __b)
   18086 {
   18087   return __aarch64_vdup_lane_s64 (__a, __b);
   18088 }
   18089 
   18090 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   18091 vdup_lane_u8 (uint8x8_t __a, const int __b)
   18092 {
   18093   return __aarch64_vdup_lane_u8 (__a, __b);
   18094 }
   18095 
   18096 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   18097 vdup_lane_u16 (uint16x4_t __a, const int __b)
   18098 {
   18099   return __aarch64_vdup_lane_u16 (__a, __b);
   18100 }
   18101 
   18102 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   18103 vdup_lane_u32 (uint32x2_t __a, const int __b)
   18104 {
   18105   return __aarch64_vdup_lane_u32 (__a, __b);
   18106 }
   18107 
   18108 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   18109 vdup_lane_u64 (uint64x1_t __a, const int __b)
   18110 {
   18111   return __aarch64_vdup_lane_u64 (__a, __b);
   18112 }
   18113 
   18114 /* vdup_laneq  */
   18115 
   18116 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   18117 vdup_laneq_f32 (float32x4_t __a, const int __b)
   18118 {
   18119   return __aarch64_vdup_laneq_f32 (__a, __b);
   18120 }
   18121 
   18122 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   18123 vdup_laneq_f64 (float64x2_t __a, const int __b)
   18124 {
   18125   return __aarch64_vdup_laneq_f64 (__a, __b);
   18126 }
   18127 
   18128 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   18129 vdup_laneq_p8 (poly8x16_t __a, const int __b)
   18130 {
   18131   return __aarch64_vdup_laneq_p8 (__a, __b);
   18132 }
   18133 
   18134 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   18135 vdup_laneq_p16 (poly16x8_t __a, const int __b)
   18136 {
   18137   return __aarch64_vdup_laneq_p16 (__a, __b);
   18138 }
   18139 
   18140 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   18141 vdup_laneq_s8 (int8x16_t __a, const int __b)
   18142 {
   18143   return __aarch64_vdup_laneq_s8 (__a, __b);
   18144 }
   18145 
   18146 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   18147 vdup_laneq_s16 (int16x8_t __a, const int __b)
   18148 {
   18149   return __aarch64_vdup_laneq_s16 (__a, __b);
   18150 }
   18151 
   18152 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   18153 vdup_laneq_s32 (int32x4_t __a, const int __b)
   18154 {
   18155   return __aarch64_vdup_laneq_s32 (__a, __b);
   18156 }
   18157 
   18158 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   18159 vdup_laneq_s64 (int64x2_t __a, const int __b)
   18160 {
   18161   return __aarch64_vdup_laneq_s64 (__a, __b);
   18162 }
   18163 
   18164 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   18165 vdup_laneq_u8 (uint8x16_t __a, const int __b)
   18166 {
   18167   return __aarch64_vdup_laneq_u8 (__a, __b);
   18168 }
   18169 
   18170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   18171 vdup_laneq_u16 (uint16x8_t __a, const int __b)
   18172 {
   18173   return __aarch64_vdup_laneq_u16 (__a, __b);
   18174 }
   18175 
   18176 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   18177 vdup_laneq_u32 (uint32x4_t __a, const int __b)
   18178 {
   18179   return __aarch64_vdup_laneq_u32 (__a, __b);
   18180 }
   18181 
   18182 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   18183 vdup_laneq_u64 (uint64x2_t __a, const int __b)
   18184 {
   18185   return __aarch64_vdup_laneq_u64 (__a, __b);
   18186 }
   18187 
   18188 /* vdupq_lane  */
   18189 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   18190 vdupq_lane_f32 (float32x2_t __a, const int __b)
   18191 {
   18192   return __aarch64_vdupq_lane_f32 (__a, __b);
   18193 }
   18194 
   18195 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   18196 vdupq_lane_f64 (float64x1_t __a, const int __b)
   18197 {
   18198   return __aarch64_vdupq_lane_f64 (__a, __b);
   18199 }
   18200 
   18201 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   18202 vdupq_lane_p8 (poly8x8_t __a, const int __b)
   18203 {
   18204   return __aarch64_vdupq_lane_p8 (__a, __b);
   18205 }
   18206 
   18207 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   18208 vdupq_lane_p16 (poly16x4_t __a, const int __b)
   18209 {
   18210   return __aarch64_vdupq_lane_p16 (__a, __b);
   18211 }
   18212 
   18213 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   18214 vdupq_lane_s8 (int8x8_t __a, const int __b)
   18215 {
   18216   return __aarch64_vdupq_lane_s8 (__a, __b);
   18217 }
   18218 
   18219 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   18220 vdupq_lane_s16 (int16x4_t __a, const int __b)
   18221 {
   18222   return __aarch64_vdupq_lane_s16 (__a, __b);
   18223 }
   18224 
   18225 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   18226 vdupq_lane_s32 (int32x2_t __a, const int __b)
   18227 {
   18228   return __aarch64_vdupq_lane_s32 (__a, __b);
   18229 }
   18230 
   18231 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   18232 vdupq_lane_s64 (int64x1_t __a, const int __b)
   18233 {
   18234   return __aarch64_vdupq_lane_s64 (__a, __b);
   18235 }
   18236 
   18237 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   18238 vdupq_lane_u8 (uint8x8_t __a, const int __b)
   18239 {
   18240   return __aarch64_vdupq_lane_u8 (__a, __b);
   18241 }
   18242 
   18243 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   18244 vdupq_lane_u16 (uint16x4_t __a, const int __b)
   18245 {
   18246   return __aarch64_vdupq_lane_u16 (__a, __b);
   18247 }
   18248 
   18249 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   18250 vdupq_lane_u32 (uint32x2_t __a, const int __b)
   18251 {
   18252   return __aarch64_vdupq_lane_u32 (__a, __b);
   18253 }
   18254 
   18255 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   18256 vdupq_lane_u64 (uint64x1_t __a, const int __b)
   18257 {
   18258   return __aarch64_vdupq_lane_u64 (__a, __b);
   18259 }
   18260 
   18261 /* vdupq_laneq  */
   18262 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   18263 vdupq_laneq_f32 (float32x4_t __a, const int __b)
   18264 {
   18265   return __aarch64_vdupq_laneq_f32 (__a, __b);
   18266 }
   18267 
   18268 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   18269 vdupq_laneq_f64 (float64x2_t __a, const int __b)
   18270 {
   18271   return __aarch64_vdupq_laneq_f64 (__a, __b);
   18272 }
   18273 
   18274 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   18275 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
   18276 {
   18277   return __aarch64_vdupq_laneq_p8 (__a, __b);
   18278 }
   18279 
   18280 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   18281 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
   18282 {
   18283   return __aarch64_vdupq_laneq_p16 (__a, __b);
   18284 }
   18285 
   18286 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   18287 vdupq_laneq_s8 (int8x16_t __a, const int __b)
   18288 {
   18289   return __aarch64_vdupq_laneq_s8 (__a, __b);
   18290 }
   18291 
   18292 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   18293 vdupq_laneq_s16 (int16x8_t __a, const int __b)
   18294 {
   18295   return __aarch64_vdupq_laneq_s16 (__a, __b);
   18296 }
   18297 
   18298 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   18299 vdupq_laneq_s32 (int32x4_t __a, const int __b)
   18300 {
   18301   return __aarch64_vdupq_laneq_s32 (__a, __b);
   18302 }
   18303 
   18304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   18305 vdupq_laneq_s64 (int64x2_t __a, const int __b)
   18306 {
   18307   return __aarch64_vdupq_laneq_s64 (__a, __b);
   18308 }
   18309 
   18310 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   18311 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
   18312 {
   18313   return __aarch64_vdupq_laneq_u8 (__a, __b);
   18314 }
   18315 
   18316 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   18317 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
   18318 {
   18319   return __aarch64_vdupq_laneq_u16 (__a, __b);
   18320 }
   18321 
   18322 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   18323 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
   18324 {
   18325   return __aarch64_vdupq_laneq_u32 (__a, __b);
   18326 }
   18327 
   18328 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   18329 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
   18330 {
   18331   return __aarch64_vdupq_laneq_u64 (__a, __b);
   18332 }
   18333 
   18334 /* vdupb_lane  */
   18335 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
   18336 vdupb_lane_p8 (poly8x8_t __a, const int __b)
   18337 {
   18338   return __aarch64_vget_lane_p8 (__a, __b);
   18339 }
   18340 
   18341 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   18342 vdupb_lane_s8 (int8x8_t __a, const int __b)
   18343 {
   18344   return __aarch64_vget_lane_s8 (__a, __b);
   18345 }
   18346 
   18347 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   18348 vdupb_lane_u8 (uint8x8_t __a, const int __b)
   18349 {
   18350   return __aarch64_vget_lane_u8 (__a, __b);
   18351 }
   18352 
   18353 /* vduph_lane  */
   18354 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
   18355 vduph_lane_p16 (poly16x4_t __a, const int __b)
   18356 {
   18357   return __aarch64_vget_lane_p16 (__a, __b);
   18358 }
   18359 
   18360 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   18361 vduph_lane_s16 (int16x4_t __a, const int __b)
   18362 {
   18363   return __aarch64_vget_lane_s16 (__a, __b);
   18364 }
   18365 
   18366 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   18367 vduph_lane_u16 (uint16x4_t __a, const int __b)
   18368 {
   18369   return __aarch64_vget_lane_u16 (__a, __b);
   18370 }
   18371 
   18372 /* vdups_lane  */
   18373 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   18374 vdups_lane_f32 (float32x2_t __a, const int __b)
   18375 {
   18376   return __aarch64_vget_lane_f32 (__a, __b);
   18377 }
   18378 
   18379 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   18380 vdups_lane_s32 (int32x2_t __a, const int __b)
   18381 {
   18382   return __aarch64_vget_lane_s32 (__a, __b);
   18383 }
   18384 
   18385 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   18386 vdups_lane_u32 (uint32x2_t __a, const int __b)
   18387 {
   18388   return __aarch64_vget_lane_u32 (__a, __b);
   18389 }
   18390 
   18391 /* vdupd_lane  */
   18392 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18393 vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
   18394 {
   18395   return __a;
   18396 }
   18397 
   18398 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   18399 vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
   18400 {
   18401   return __a;
   18402 }
   18403 
   18404 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   18405 vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
   18406 {
   18407   return __a;
   18408 }
   18409 
   18410 /* vdupb_laneq  */
   18411 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
   18412 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
   18413 {
   18414   return __aarch64_vgetq_lane_p8 (__a, __b);
   18415 }
   18416 
   18417 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   18418 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
   18419 {
   18420   return __aarch64_vgetq_lane_s8 (__a, __b);
   18421 }
   18422 
   18423 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   18424 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
   18425 {
   18426   return __aarch64_vgetq_lane_u8 (__a, __b);
   18427 }
   18428 
   18429 /* vduph_laneq  */
   18430 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
   18431 vduph_laneq_p16 (poly16x8_t __a, const int __b)
   18432 {
   18433   return __aarch64_vgetq_lane_p16 (__a, __b);
   18434 }
   18435 
   18436 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   18437 vduph_laneq_s16 (int16x8_t __a, const int __b)
   18438 {
   18439   return __aarch64_vgetq_lane_s16 (__a, __b);
   18440 }
   18441 
   18442 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   18443 vduph_laneq_u16 (uint16x8_t __a, const int __b)
   18444 {
   18445   return __aarch64_vgetq_lane_u16 (__a, __b);
   18446 }
   18447 
   18448 /* vdups_laneq  */
   18449 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   18450 vdups_laneq_f32 (float32x4_t __a, const int __b)
   18451 {
   18452   return __aarch64_vgetq_lane_f32 (__a, __b);
   18453 }
   18454 
   18455 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   18456 vdups_laneq_s32 (int32x4_t __a, const int __b)
   18457 {
   18458   return __aarch64_vgetq_lane_s32 (__a, __b);
   18459 }
   18460 
   18461 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   18462 vdups_laneq_u32 (uint32x4_t __a, const int __b)
   18463 {
   18464   return __aarch64_vgetq_lane_u32 (__a, __b);
   18465 }
   18466 
   18467 /* vdupd_laneq  */
   18468 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18469 vdupd_laneq_f64 (float64x2_t __a, const int __b)
   18470 {
   18471   return __aarch64_vgetq_lane_f64 (__a, __b);
   18472 }
   18473 
   18474 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
   18475 vdupd_laneq_s64 (int64x2_t __a, const int __b)
   18476 {
   18477   return __aarch64_vgetq_lane_s64 (__a, __b);
   18478 }
   18479 
   18480 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   18481 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
   18482 {
   18483   return __aarch64_vgetq_lane_u64 (__a, __b);
   18484 }
   18485 
   18486 /* vfma_lane  */
   18487 
   18488 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   18489 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
   18490 	       float32x2_t __c, const int __lane)
   18491 {
   18492   return __builtin_aarch64_fmav2sf (__b,
   18493 				    __aarch64_vdup_lane_f32 (__c, __lane),
   18494 				    __a);
   18495 }
   18496 
   18497 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18498 vfma_lane_f64 (float64_t __a, float64_t __b,
   18499 	       float64_t __c, const int __lane)
   18500 {
   18501   return __builtin_fma (__b, __c, __a);
   18502 }
   18503 
   18504 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18505 vfmad_lane_f64 (float64_t __a, float64_t __b,
   18506 	        float64_t __c, const int __lane)
   18507 {
   18508   return __builtin_fma (__b, __c, __a);
   18509 }
   18510 
   18511 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   18512 vfmas_lane_f32 (float32_t __a, float32_t __b,
   18513 	        float32x2_t __c, const int __lane)
   18514 {
   18515   return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
   18516 }
   18517 
   18518 /* vfma_laneq  */
   18519 
   18520 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   18521 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
   18522 	        float32x4_t __c, const int __lane)
   18523 {
   18524   return __builtin_aarch64_fmav2sf (__b,
   18525 				    __aarch64_vdup_laneq_f32 (__c, __lane),
   18526 				    __a);
   18527 }
   18528 
   18529 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18530 vfma_laneq_f64 (float64_t __a, float64_t __b,
   18531 	        float64x2_t __c, const int __lane)
   18532 {
   18533   return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
   18534 }
   18535 
   18536 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18537 vfmad_laneq_f64 (float64_t __a, float64_t __b,
   18538 	         float64x2_t __c, const int __lane)
   18539 {
   18540   return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
   18541 }
   18542 
   18543 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   18544 vfmas_laneq_f32 (float32_t __a, float32_t __b,
   18545 		 float32x4_t __c, const int __lane)
   18546 {
   18547   return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
   18548 }
   18549 
   18550 /* vfmaq_lane  */
   18551 
   18552 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   18553 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
   18554 	        float32x2_t __c, const int __lane)
   18555 {
   18556   return __builtin_aarch64_fmav4sf (__b,
   18557 				    __aarch64_vdupq_lane_f32 (__c, __lane),
   18558 				    __a);
   18559 }
   18560 
   18561 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   18562 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
   18563 	        float64_t __c, const int __lane)
   18564 {
   18565   return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
   18566 }
   18567 
   18568 /* vfmaq_laneq  */
   18569 
   18570 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   18571 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
   18572 	         float32x4_t __c, const int __lane)
   18573 {
   18574   return __builtin_aarch64_fmav4sf (__b,
   18575 				    __aarch64_vdupq_laneq_f32 (__c, __lane),
   18576 				    __a);
   18577 }
   18578 
   18579 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   18580 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
   18581 	         float64x2_t __c, const int __lane)
   18582 {
   18583   return __builtin_aarch64_fmav2df (__b,
   18584 				    __aarch64_vdupq_laneq_f64 (__c, __lane),
   18585 				    __a);
   18586 }
   18587 
   18588 /* vfms_lane  */
   18589 
   18590 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   18591 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
   18592 	       float32x2_t __c, const int __lane)
   18593 {
   18594   return __builtin_aarch64_fmav2sf (-__b,
   18595 				    __aarch64_vdup_lane_f32 (__c, __lane),
   18596 				    __a);
   18597 }
   18598 
   18599 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18600 vfms_lane_f64 (float64_t __a, float64_t __b,
   18601 	       float64_t __c, const int __lane)
   18602 {
   18603   return __builtin_fma (-__b, __c, __a);
   18604 }
   18605 
   18606 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18607 vfmsd_lane_f64 (float64_t __a, float64_t __b,
   18608 	        float64_t __c, const int __lane)
   18609 {
   18610   return __builtin_fma (-__b, __c, __a);
   18611 }
   18612 
   18613 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   18614 vfmss_lane_f32 (float32_t __a, float32_t __b,
   18615 	        float32x2_t __c, const int __lane)
   18616 {
   18617   return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
   18618 }
   18619 
   18620 /* vfms_laneq  */
   18621 
   18622 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   18623 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
   18624 	        float32x4_t __c, const int __lane)
   18625 {
   18626   return __builtin_aarch64_fmav2sf (-__b,
   18627 				    __aarch64_vdup_laneq_f32 (__c, __lane),
   18628 				    __a);
   18629 }
   18630 
   18631 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18632 vfms_laneq_f64 (float64_t __a, float64_t __b,
   18633 	        float64x2_t __c, const int __lane)
   18634 {
   18635   return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
   18636 }
   18637 
   18638 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   18639 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
   18640 	         float64x2_t __c, const int __lane)
   18641 {
   18642   return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
   18643 }
   18644 
   18645 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   18646 vfmss_laneq_f32 (float32_t __a, float32_t __b,
   18647 		 float32x4_t __c, const int __lane)
   18648 {
   18649   return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
   18650 }
   18651 
   18652 /* vfmsq_lane  */
   18653 
   18654 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   18655 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
   18656 	        float32x2_t __c, const int __lane)
   18657 {
   18658   return __builtin_aarch64_fmav4sf (-__b,
   18659 				    __aarch64_vdupq_lane_f32 (__c, __lane),
   18660 				    __a);
   18661 }
   18662 
   18663 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   18664 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
   18665 	        float64_t __c, const int __lane)
   18666 {
   18667   return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
   18668 }
   18669 
   18670 /* vfmsq_laneq  */
   18671 
   18672 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   18673 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
   18674 	         float32x4_t __c, const int __lane)
   18675 {
   18676   return __builtin_aarch64_fmav4sf (-__b,
   18677 				    __aarch64_vdupq_laneq_f32 (__c, __lane),
   18678 				    __a);
   18679 }
   18680 
   18681 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   18682 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
   18683 	         float64x2_t __c, const int __lane)
   18684 {
   18685   return __builtin_aarch64_fmav2df (-__b,
   18686 				    __aarch64_vdupq_laneq_f64 (__c, __lane),
   18687 				    __a);
   18688 }
   18689 
   18690 /* vld1 */
   18691 
   18692 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   18693 vld1_f32 (const float32_t *a)
   18694 {
   18695   return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
   18696 }
   18697 
   18698 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   18699 vld1_f64 (const float64_t *a)
   18700 {
   18701   return *a;
   18702 }
   18703 
   18704 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   18705 vld1_p8 (const poly8_t *a)
   18706 {
   18707   return (poly8x8_t)
   18708     __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
   18709 }
   18710 
   18711 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   18712 vld1_p16 (const poly16_t *a)
   18713 {
   18714   return (poly16x4_t)
   18715     __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
   18716 }
   18717 
   18718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   18719 vld1_s8 (const int8_t *a)
   18720 {
   18721   return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
   18722 }
   18723 
   18724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   18725 vld1_s16 (const int16_t *a)
   18726 {
   18727   return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
   18728 }
   18729 
   18730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   18731 vld1_s32 (const int32_t *a)
   18732 {
   18733   return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
   18734 }
   18735 
   18736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   18737 vld1_s64 (const int64_t *a)
   18738 {
   18739   return *a;
   18740 }
   18741 
   18742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   18743 vld1_u8 (const uint8_t *a)
   18744 {
   18745   return (uint8x8_t)
   18746     __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
   18747 }
   18748 
   18749 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   18750 vld1_u16 (const uint16_t *a)
   18751 {
   18752   return (uint16x4_t)
   18753     __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
   18754 }
   18755 
   18756 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   18757 vld1_u32 (const uint32_t *a)
   18758 {
   18759   return (uint32x2_t)
   18760     __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
   18761 }
   18762 
   18763 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   18764 vld1_u64 (const uint64_t *a)
   18765 {
   18766   return *a;
   18767 }
   18768 
   18769 /* vld1q */
   18770 
   18771 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   18772 vld1q_f32 (const float32_t *a)
   18773 {
   18774   return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
   18775 }
   18776 
   18777 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   18778 vld1q_f64 (const float64_t *a)
   18779 {
   18780   return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
   18781 }
   18782 
   18783 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   18784 vld1q_p8 (const poly8_t *a)
   18785 {
   18786   return (poly8x16_t)
   18787     __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
   18788 }
   18789 
   18790 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   18791 vld1q_p16 (const poly16_t *a)
   18792 {
   18793   return (poly16x8_t)
   18794     __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
   18795 }
   18796 
   18797 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   18798 vld1q_s8 (const int8_t *a)
   18799 {
   18800   return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
   18801 }
   18802 
   18803 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   18804 vld1q_s16 (const int16_t *a)
   18805 {
   18806   return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
   18807 }
   18808 
   18809 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   18810 vld1q_s32 (const int32_t *a)
   18811 {
   18812   return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
   18813 }
   18814 
   18815 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   18816 vld1q_s64 (const int64_t *a)
   18817 {
   18818   return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
   18819 }
   18820 
   18821 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   18822 vld1q_u8 (const uint8_t *a)
   18823 {
   18824   return (uint8x16_t)
   18825     __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
   18826 }
   18827 
   18828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   18829 vld1q_u16 (const uint16_t *a)
   18830 {
   18831   return (uint16x8_t)
   18832     __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
   18833 }
   18834 
   18835 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   18836 vld1q_u32 (const uint32_t *a)
   18837 {
   18838   return (uint32x4_t)
   18839     __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
   18840 }
   18841 
   18842 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   18843 vld1q_u64 (const uint64_t *a)
   18844 {
   18845   return (uint64x2_t)
   18846     __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
   18847 }
   18848 
   18849 /* vldn */
   18850 
   18851 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
   18852 vld2_s64 (const int64_t * __a)
   18853 {
   18854   int64x1x2_t ret;
   18855   __builtin_aarch64_simd_oi __o;
   18856   __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
   18857   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
   18858   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
   18859   return ret;
   18860 }
   18861 
   18862 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
   18863 vld2_u64 (const uint64_t * __a)
   18864 {
   18865   uint64x1x2_t ret;
   18866   __builtin_aarch64_simd_oi __o;
   18867   __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
   18868   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
   18869   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
   18870   return ret;
   18871 }
   18872 
   18873 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
   18874 vld2_f64 (const float64_t * __a)
   18875 {
   18876   float64x1x2_t ret;
   18877   __builtin_aarch64_simd_oi __o;
   18878   __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
   18879   ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
   18880   ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
   18881   return ret;
   18882 }
   18883 
   18884 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
   18885 vld2_s8 (const int8_t * __a)
   18886 {
   18887   int8x8x2_t ret;
   18888   __builtin_aarch64_simd_oi __o;
   18889   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
   18890   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
   18891   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
   18892   return ret;
   18893 }
   18894 
   18895 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
   18896 vld2_p8 (const poly8_t * __a)
   18897 {
   18898   poly8x8x2_t ret;
   18899   __builtin_aarch64_simd_oi __o;
   18900   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
   18901   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
   18902   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
   18903   return ret;
   18904 }
   18905 
   18906 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
   18907 vld2_s16 (const int16_t * __a)
   18908 {
   18909   int16x4x2_t ret;
   18910   __builtin_aarch64_simd_oi __o;
   18911   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
   18912   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
   18913   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
   18914   return ret;
   18915 }
   18916 
   18917 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
   18918 vld2_p16 (const poly16_t * __a)
   18919 {
   18920   poly16x4x2_t ret;
   18921   __builtin_aarch64_simd_oi __o;
   18922   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
   18923   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
   18924   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
   18925   return ret;
   18926 }
   18927 
   18928 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
   18929 vld2_s32 (const int32_t * __a)
   18930 {
   18931   int32x2x2_t ret;
   18932   __builtin_aarch64_simd_oi __o;
   18933   __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
   18934   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
   18935   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
   18936   return ret;
   18937 }
   18938 
   18939 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
   18940 vld2_u8 (const uint8_t * __a)
   18941 {
   18942   uint8x8x2_t ret;
   18943   __builtin_aarch64_simd_oi __o;
   18944   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
   18945   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
   18946   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
   18947   return ret;
   18948 }
   18949 
   18950 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
   18951 vld2_u16 (const uint16_t * __a)
   18952 {
   18953   uint16x4x2_t ret;
   18954   __builtin_aarch64_simd_oi __o;
   18955   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
   18956   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
   18957   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
   18958   return ret;
   18959 }
   18960 
   18961 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
   18962 vld2_u32 (const uint32_t * __a)
   18963 {
   18964   uint32x2x2_t ret;
   18965   __builtin_aarch64_simd_oi __o;
   18966   __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
   18967   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
   18968   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
   18969   return ret;
   18970 }
   18971 
   18972 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
   18973 vld2_f32 (const float32_t * __a)
   18974 {
   18975   float32x2x2_t ret;
   18976   __builtin_aarch64_simd_oi __o;
   18977   __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
   18978   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
   18979   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
   18980   return ret;
   18981 }
   18982 
   18983 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
   18984 vld2q_s8 (const int8_t * __a)
   18985 {
   18986   int8x16x2_t ret;
   18987   __builtin_aarch64_simd_oi __o;
   18988   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
   18989   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
   18990   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
   18991   return ret;
   18992 }
   18993 
   18994 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
   18995 vld2q_p8 (const poly8_t * __a)
   18996 {
   18997   poly8x16x2_t ret;
   18998   __builtin_aarch64_simd_oi __o;
   18999   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19000   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
   19001   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
   19002   return ret;
   19003 }
   19004 
   19005 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
   19006 vld2q_s16 (const int16_t * __a)
   19007 {
   19008   int16x8x2_t ret;
   19009   __builtin_aarch64_simd_oi __o;
   19010   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19011   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
   19012   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
   19013   return ret;
   19014 }
   19015 
   19016 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
   19017 vld2q_p16 (const poly16_t * __a)
   19018 {
   19019   poly16x8x2_t ret;
   19020   __builtin_aarch64_simd_oi __o;
   19021   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19022   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
   19023   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
   19024   return ret;
   19025 }
   19026 
   19027 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
   19028 vld2q_s32 (const int32_t * __a)
   19029 {
   19030   int32x4x2_t ret;
   19031   __builtin_aarch64_simd_oi __o;
   19032   __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
   19033   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
   19034   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
   19035   return ret;
   19036 }
   19037 
   19038 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
   19039 vld2q_s64 (const int64_t * __a)
   19040 {
   19041   int64x2x2_t ret;
   19042   __builtin_aarch64_simd_oi __o;
   19043   __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
   19044   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
   19045   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
   19046   return ret;
   19047 }
   19048 
   19049 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
   19050 vld2q_u8 (const uint8_t * __a)
   19051 {
   19052   uint8x16x2_t ret;
   19053   __builtin_aarch64_simd_oi __o;
   19054   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19055   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
   19056   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
   19057   return ret;
   19058 }
   19059 
   19060 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
   19061 vld2q_u16 (const uint16_t * __a)
   19062 {
   19063   uint16x8x2_t ret;
   19064   __builtin_aarch64_simd_oi __o;
   19065   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19066   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
   19067   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
   19068   return ret;
   19069 }
   19070 
   19071 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
   19072 vld2q_u32 (const uint32_t * __a)
   19073 {
   19074   uint32x4x2_t ret;
   19075   __builtin_aarch64_simd_oi __o;
   19076   __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
   19077   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
   19078   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
   19079   return ret;
   19080 }
   19081 
   19082 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
   19083 vld2q_u64 (const uint64_t * __a)
   19084 {
   19085   uint64x2x2_t ret;
   19086   __builtin_aarch64_simd_oi __o;
   19087   __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
   19088   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
   19089   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
   19090   return ret;
   19091 }
   19092 
   19093 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
   19094 vld2q_f32 (const float32_t * __a)
   19095 {
   19096   float32x4x2_t ret;
   19097   __builtin_aarch64_simd_oi __o;
   19098   __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
   19099   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
   19100   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
   19101   return ret;
   19102 }
   19103 
   19104 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
   19105 vld2q_f64 (const float64_t * __a)
   19106 {
   19107   float64x2x2_t ret;
   19108   __builtin_aarch64_simd_oi __o;
   19109   __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
   19110   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
   19111   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
   19112   return ret;
   19113 }
   19114 
   19115 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
   19116 vld3_s64 (const int64_t * __a)
   19117 {
   19118   int64x1x3_t ret;
   19119   __builtin_aarch64_simd_ci __o;
   19120   __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
   19121   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
   19122   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
   19123   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
   19124   return ret;
   19125 }
   19126 
   19127 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
   19128 vld3_u64 (const uint64_t * __a)
   19129 {
   19130   uint64x1x3_t ret;
   19131   __builtin_aarch64_simd_ci __o;
   19132   __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
   19133   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
   19134   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
   19135   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
   19136   return ret;
   19137 }
   19138 
   19139 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
   19140 vld3_f64 (const float64_t * __a)
   19141 {
   19142   float64x1x3_t ret;
   19143   __builtin_aarch64_simd_ci __o;
   19144   __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
   19145   ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
   19146   ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
   19147   ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
   19148   return ret;
   19149 }
   19150 
   19151 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
   19152 vld3_s8 (const int8_t * __a)
   19153 {
   19154   int8x8x3_t ret;
   19155   __builtin_aarch64_simd_ci __o;
   19156   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
   19157   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
   19158   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
   19159   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
   19160   return ret;
   19161 }
   19162 
   19163 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
   19164 vld3_p8 (const poly8_t * __a)
   19165 {
   19166   poly8x8x3_t ret;
   19167   __builtin_aarch64_simd_ci __o;
   19168   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
   19169   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
   19170   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
   19171   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
   19172   return ret;
   19173 }
   19174 
   19175 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
   19176 vld3_s16 (const int16_t * __a)
   19177 {
   19178   int16x4x3_t ret;
   19179   __builtin_aarch64_simd_ci __o;
   19180   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
   19181   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
   19182   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
   19183   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
   19184   return ret;
   19185 }
   19186 
   19187 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
   19188 vld3_p16 (const poly16_t * __a)
   19189 {
   19190   poly16x4x3_t ret;
   19191   __builtin_aarch64_simd_ci __o;
   19192   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
   19193   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
   19194   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
   19195   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
   19196   return ret;
   19197 }
   19198 
   19199 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
   19200 vld3_s32 (const int32_t * __a)
   19201 {
   19202   int32x2x3_t ret;
   19203   __builtin_aarch64_simd_ci __o;
   19204   __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
   19205   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
   19206   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
   19207   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
   19208   return ret;
   19209 }
   19210 
   19211 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
   19212 vld3_u8 (const uint8_t * __a)
   19213 {
   19214   uint8x8x3_t ret;
   19215   __builtin_aarch64_simd_ci __o;
   19216   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
   19217   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
   19218   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
   19219   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
   19220   return ret;
   19221 }
   19222 
   19223 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
   19224 vld3_u16 (const uint16_t * __a)
   19225 {
   19226   uint16x4x3_t ret;
   19227   __builtin_aarch64_simd_ci __o;
   19228   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
   19229   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
   19230   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
   19231   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
   19232   return ret;
   19233 }
   19234 
   19235 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
   19236 vld3_u32 (const uint32_t * __a)
   19237 {
   19238   uint32x2x3_t ret;
   19239   __builtin_aarch64_simd_ci __o;
   19240   __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
   19241   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
   19242   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
   19243   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
   19244   return ret;
   19245 }
   19246 
   19247 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
   19248 vld3_f32 (const float32_t * __a)
   19249 {
   19250   float32x2x3_t ret;
   19251   __builtin_aarch64_simd_ci __o;
   19252   __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
   19253   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
   19254   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
   19255   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
   19256   return ret;
   19257 }
   19258 
   19259 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
   19260 vld3q_s8 (const int8_t * __a)
   19261 {
   19262   int8x16x3_t ret;
   19263   __builtin_aarch64_simd_ci __o;
   19264   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19265   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
   19266   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
   19267   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
   19268   return ret;
   19269 }
   19270 
   19271 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
   19272 vld3q_p8 (const poly8_t * __a)
   19273 {
   19274   poly8x16x3_t ret;
   19275   __builtin_aarch64_simd_ci __o;
   19276   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19277   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
   19278   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
   19279   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
   19280   return ret;
   19281 }
   19282 
   19283 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
   19284 vld3q_s16 (const int16_t * __a)
   19285 {
   19286   int16x8x3_t ret;
   19287   __builtin_aarch64_simd_ci __o;
   19288   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19289   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
   19290   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
   19291   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
   19292   return ret;
   19293 }
   19294 
   19295 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
   19296 vld3q_p16 (const poly16_t * __a)
   19297 {
   19298   poly16x8x3_t ret;
   19299   __builtin_aarch64_simd_ci __o;
   19300   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19301   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
   19302   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
   19303   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
   19304   return ret;
   19305 }
   19306 
   19307 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
   19308 vld3q_s32 (const int32_t * __a)
   19309 {
   19310   int32x4x3_t ret;
   19311   __builtin_aarch64_simd_ci __o;
   19312   __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
   19313   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
   19314   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
   19315   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
   19316   return ret;
   19317 }
   19318 
   19319 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
   19320 vld3q_s64 (const int64_t * __a)
   19321 {
   19322   int64x2x3_t ret;
   19323   __builtin_aarch64_simd_ci __o;
   19324   __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
   19325   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
   19326   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
   19327   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
   19328   return ret;
   19329 }
   19330 
   19331 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
   19332 vld3q_u8 (const uint8_t * __a)
   19333 {
   19334   uint8x16x3_t ret;
   19335   __builtin_aarch64_simd_ci __o;
   19336   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19337   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
   19338   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
   19339   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
   19340   return ret;
   19341 }
   19342 
   19343 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
   19344 vld3q_u16 (const uint16_t * __a)
   19345 {
   19346   uint16x8x3_t ret;
   19347   __builtin_aarch64_simd_ci __o;
   19348   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19349   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
   19350   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
   19351   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
   19352   return ret;
   19353 }
   19354 
   19355 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
   19356 vld3q_u32 (const uint32_t * __a)
   19357 {
   19358   uint32x4x3_t ret;
   19359   __builtin_aarch64_simd_ci __o;
   19360   __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
   19361   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
   19362   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
   19363   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
   19364   return ret;
   19365 }
   19366 
   19367 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
   19368 vld3q_u64 (const uint64_t * __a)
   19369 {
   19370   uint64x2x3_t ret;
   19371   __builtin_aarch64_simd_ci __o;
   19372   __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
   19373   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
   19374   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
   19375   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
   19376   return ret;
   19377 }
   19378 
   19379 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
   19380 vld3q_f32 (const float32_t * __a)
   19381 {
   19382   float32x4x3_t ret;
   19383   __builtin_aarch64_simd_ci __o;
   19384   __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
   19385   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
   19386   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
   19387   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
   19388   return ret;
   19389 }
   19390 
   19391 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
   19392 vld3q_f64 (const float64_t * __a)
   19393 {
   19394   float64x2x3_t ret;
   19395   __builtin_aarch64_simd_ci __o;
   19396   __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
   19397   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
   19398   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
   19399   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
   19400   return ret;
   19401 }
   19402 
   19403 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
   19404 vld4_s64 (const int64_t * __a)
   19405 {
   19406   int64x1x4_t ret;
   19407   __builtin_aarch64_simd_xi __o;
   19408   __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
   19409   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
   19410   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
   19411   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
   19412   ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
   19413   return ret;
   19414 }
   19415 
   19416 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
   19417 vld4_u64 (const uint64_t * __a)
   19418 {
   19419   uint64x1x4_t ret;
   19420   __builtin_aarch64_simd_xi __o;
   19421   __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
   19422   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
   19423   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
   19424   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
   19425   ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
   19426   return ret;
   19427 }
   19428 
   19429 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
   19430 vld4_f64 (const float64_t * __a)
   19431 {
   19432   float64x1x4_t ret;
   19433   __builtin_aarch64_simd_xi __o;
   19434   __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
   19435   ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
   19436   ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
   19437   ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
   19438   ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
   19439   return ret;
   19440 }
   19441 
   19442 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
   19443 vld4_s8 (const int8_t * __a)
   19444 {
   19445   int8x8x4_t ret;
   19446   __builtin_aarch64_simd_xi __o;
   19447   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
   19448   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
   19449   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
   19450   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
   19451   ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
   19452   return ret;
   19453 }
   19454 
   19455 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
   19456 vld4_p8 (const poly8_t * __a)
   19457 {
   19458   poly8x8x4_t ret;
   19459   __builtin_aarch64_simd_xi __o;
   19460   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
   19461   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
   19462   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
   19463   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
   19464   ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
   19465   return ret;
   19466 }
   19467 
   19468 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
   19469 vld4_s16 (const int16_t * __a)
   19470 {
   19471   int16x4x4_t ret;
   19472   __builtin_aarch64_simd_xi __o;
   19473   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
   19474   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
   19475   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
   19476   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
   19477   ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
   19478   return ret;
   19479 }
   19480 
   19481 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
   19482 vld4_p16 (const poly16_t * __a)
   19483 {
   19484   poly16x4x4_t ret;
   19485   __builtin_aarch64_simd_xi __o;
   19486   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
   19487   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
   19488   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
   19489   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
   19490   ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
   19491   return ret;
   19492 }
   19493 
   19494 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
   19495 vld4_s32 (const int32_t * __a)
   19496 {
   19497   int32x2x4_t ret;
   19498   __builtin_aarch64_simd_xi __o;
   19499   __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
   19500   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
   19501   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
   19502   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
   19503   ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
   19504   return ret;
   19505 }
   19506 
   19507 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
   19508 vld4_u8 (const uint8_t * __a)
   19509 {
   19510   uint8x8x4_t ret;
   19511   __builtin_aarch64_simd_xi __o;
   19512   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
   19513   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
   19514   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
   19515   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
   19516   ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
   19517   return ret;
   19518 }
   19519 
   19520 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
   19521 vld4_u16 (const uint16_t * __a)
   19522 {
   19523   uint16x4x4_t ret;
   19524   __builtin_aarch64_simd_xi __o;
   19525   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
   19526   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
   19527   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
   19528   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
   19529   ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
   19530   return ret;
   19531 }
   19532 
   19533 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
   19534 vld4_u32 (const uint32_t * __a)
   19535 {
   19536   uint32x2x4_t ret;
   19537   __builtin_aarch64_simd_xi __o;
   19538   __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
   19539   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
   19540   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
   19541   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
   19542   ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
   19543   return ret;
   19544 }
   19545 
   19546 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
   19547 vld4_f32 (const float32_t * __a)
   19548 {
   19549   float32x2x4_t ret;
   19550   __builtin_aarch64_simd_xi __o;
   19551   __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
   19552   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
   19553   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
   19554   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
   19555   ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
   19556   return ret;
   19557 }
   19558 
   19559 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
   19560 vld4q_s8 (const int8_t * __a)
   19561 {
   19562   int8x16x4_t ret;
   19563   __builtin_aarch64_simd_xi __o;
   19564   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19565   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
   19566   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
   19567   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
   19568   ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
   19569   return ret;
   19570 }
   19571 
   19572 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
   19573 vld4q_p8 (const poly8_t * __a)
   19574 {
   19575   poly8x16x4_t ret;
   19576   __builtin_aarch64_simd_xi __o;
   19577   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19578   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
   19579   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
   19580   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
   19581   ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
   19582   return ret;
   19583 }
   19584 
   19585 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
   19586 vld4q_s16 (const int16_t * __a)
   19587 {
   19588   int16x8x4_t ret;
   19589   __builtin_aarch64_simd_xi __o;
   19590   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19591   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
   19592   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
   19593   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
   19594   ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
   19595   return ret;
   19596 }
   19597 
   19598 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
   19599 vld4q_p16 (const poly16_t * __a)
   19600 {
   19601   poly16x8x4_t ret;
   19602   __builtin_aarch64_simd_xi __o;
   19603   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19604   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
   19605   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
   19606   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
   19607   ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
   19608   return ret;
   19609 }
   19610 
   19611 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
   19612 vld4q_s32 (const int32_t * __a)
   19613 {
   19614   int32x4x4_t ret;
   19615   __builtin_aarch64_simd_xi __o;
   19616   __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
   19617   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
   19618   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
   19619   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
   19620   ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
   19621   return ret;
   19622 }
   19623 
   19624 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
   19625 vld4q_s64 (const int64_t * __a)
   19626 {
   19627   int64x2x4_t ret;
   19628   __builtin_aarch64_simd_xi __o;
   19629   __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
   19630   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
   19631   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
   19632   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
   19633   ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
   19634   return ret;
   19635 }
   19636 
   19637 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
   19638 vld4q_u8 (const uint8_t * __a)
   19639 {
   19640   uint8x16x4_t ret;
   19641   __builtin_aarch64_simd_xi __o;
   19642   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
   19643   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
   19644   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
   19645   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
   19646   ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
   19647   return ret;
   19648 }
   19649 
   19650 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
   19651 vld4q_u16 (const uint16_t * __a)
   19652 {
   19653   uint16x8x4_t ret;
   19654   __builtin_aarch64_simd_xi __o;
   19655   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
   19656   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
   19657   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
   19658   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
   19659   ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
   19660   return ret;
   19661 }
   19662 
   19663 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
   19664 vld4q_u32 (const uint32_t * __a)
   19665 {
   19666   uint32x4x4_t ret;
   19667   __builtin_aarch64_simd_xi __o;
   19668   __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
   19669   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
   19670   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
   19671   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
   19672   ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
   19673   return ret;
   19674 }
   19675 
   19676 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
   19677 vld4q_u64 (const uint64_t * __a)
   19678 {
   19679   uint64x2x4_t ret;
   19680   __builtin_aarch64_simd_xi __o;
   19681   __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
   19682   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
   19683   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
   19684   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
   19685   ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
   19686   return ret;
   19687 }
   19688 
   19689 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
   19690 vld4q_f32 (const float32_t * __a)
   19691 {
   19692   float32x4x4_t ret;
   19693   __builtin_aarch64_simd_xi __o;
   19694   __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
   19695   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
   19696   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
   19697   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
   19698   ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
   19699   return ret;
   19700 }
   19701 
   19702 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
   19703 vld4q_f64 (const float64_t * __a)
   19704 {
   19705   float64x2x4_t ret;
   19706   __builtin_aarch64_simd_xi __o;
   19707   __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
   19708   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
   19709   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
   19710   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
   19711   ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
   19712   return ret;
   19713 }
   19714 
   19715 /* vmax */
   19716 
   19717 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   19718 vmax_f32 (float32x2_t __a, float32x2_t __b)
   19719 {
   19720   return __builtin_aarch64_smax_nanv2sf (__a, __b);
   19721 }
   19722 
   19723 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   19724 vmax_s8 (int8x8_t __a, int8x8_t __b)
   19725 {
   19726   return __builtin_aarch64_smaxv8qi (__a, __b);
   19727 }
   19728 
   19729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   19730 vmax_s16 (int16x4_t __a, int16x4_t __b)
   19731 {
   19732   return __builtin_aarch64_smaxv4hi (__a, __b);
   19733 }
   19734 
   19735 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   19736 vmax_s32 (int32x2_t __a, int32x2_t __b)
   19737 {
   19738   return __builtin_aarch64_smaxv2si (__a, __b);
   19739 }
   19740 
   19741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   19742 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
   19743 {
   19744   return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
   19745 						 (int8x8_t) __b);
   19746 }
   19747 
   19748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   19749 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
   19750 {
   19751   return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
   19752 						  (int16x4_t) __b);
   19753 }
   19754 
   19755 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   19756 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
   19757 {
   19758   return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
   19759 						  (int32x2_t) __b);
   19760 }
   19761 
   19762 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   19763 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
   19764 {
   19765   return __builtin_aarch64_smax_nanv4sf (__a, __b);
   19766 }
   19767 
   19768 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   19769 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
   19770 {
   19771   return __builtin_aarch64_smax_nanv2df (__a, __b);
   19772 }
   19773 
   19774 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   19775 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
   19776 {
   19777   return __builtin_aarch64_smaxv16qi (__a, __b);
   19778 }
   19779 
   19780 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   19781 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
   19782 {
   19783   return __builtin_aarch64_smaxv8hi (__a, __b);
   19784 }
   19785 
   19786 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   19787 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
   19788 {
   19789   return __builtin_aarch64_smaxv4si (__a, __b);
   19790 }
   19791 
   19792 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   19793 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
   19794 {
   19795   return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
   19796 						   (int8x16_t) __b);
   19797 }
   19798 
   19799 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   19800 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
   19801 {
   19802   return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
   19803 						  (int16x8_t) __b);
   19804 }
   19805 
   19806 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   19807 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
   19808 {
   19809   return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
   19810 						  (int32x4_t) __b);
   19811 }
   19812 
   19813 /* vmaxnm  */
   19814 
   19815 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   19816 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
   19817 {
   19818   return __builtin_aarch64_smaxv2sf (__a, __b);
   19819 }
   19820 
   19821 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   19822 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
   19823 {
   19824   return __builtin_aarch64_smaxv4sf (__a, __b);
   19825 }
   19826 
   19827 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   19828 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
   19829 {
   19830   return __builtin_aarch64_smaxv2df (__a, __b);
   19831 }
   19832 
   19833 /* vmaxv  */
   19834 
   19835 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   19836 vmaxv_f32 (float32x2_t __a)
   19837 {
   19838   return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
   19839 			0);
   19840 }
   19841 
   19842 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   19843 vmaxv_s8 (int8x8_t __a)
   19844 {
   19845   return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
   19846 }
   19847 
   19848 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   19849 vmaxv_s16 (int16x4_t __a)
   19850 {
   19851   return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
   19852 }
   19853 
   19854 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   19855 vmaxv_s32 (int32x2_t __a)
   19856 {
   19857   return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
   19858 }
   19859 
   19860 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   19861 vmaxv_u8 (uint8x8_t __a)
   19862 {
   19863   return vget_lane_u8 ((uint8x8_t)
   19864 		__builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
   19865 		0);
   19866 }
   19867 
   19868 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   19869 vmaxv_u16 (uint16x4_t __a)
   19870 {
   19871   return vget_lane_u16 ((uint16x4_t)
   19872 		__builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
   19873 		0);
   19874 }
   19875 
   19876 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   19877 vmaxv_u32 (uint32x2_t __a)
   19878 {
   19879   return vget_lane_u32 ((uint32x2_t)
   19880 		__builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
   19881 		0);
   19882 }
   19883 
   19884 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   19885 vmaxvq_f32 (float32x4_t __a)
   19886 {
   19887   return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
   19888 			 0);
   19889 }
   19890 
   19891 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   19892 vmaxvq_f64 (float64x2_t __a)
   19893 {
   19894   return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
   19895 			 0);
   19896 }
   19897 
   19898 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   19899 vmaxvq_s8 (int8x16_t __a)
   19900 {
   19901   return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
   19902 }
   19903 
   19904 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   19905 vmaxvq_s16 (int16x8_t __a)
   19906 {
   19907   return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
   19908 }
   19909 
   19910 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   19911 vmaxvq_s32 (int32x4_t __a)
   19912 {
   19913   return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
   19914 }
   19915 
   19916 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   19917 vmaxvq_u8 (uint8x16_t __a)
   19918 {
   19919   return vgetq_lane_u8 ((uint8x16_t)
   19920 		__builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
   19921 		0);
   19922 }
   19923 
   19924 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   19925 vmaxvq_u16 (uint16x8_t __a)
   19926 {
   19927   return vgetq_lane_u16 ((uint16x8_t)
   19928 		__builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
   19929 		0);
   19930 }
   19931 
   19932 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   19933 vmaxvq_u32 (uint32x4_t __a)
   19934 {
   19935   return vgetq_lane_u32 ((uint32x4_t)
   19936 		__builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
   19937 		0);
   19938 }
   19939 
   19940 /* vmaxnmv  */
   19941 
   19942 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   19943 vmaxnmv_f32 (float32x2_t __a)
   19944 {
   19945   return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
   19946 			0);
   19947 }
   19948 
   19949 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   19950 vmaxnmvq_f32 (float32x4_t __a)
   19951 {
   19952   return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
   19953 }
   19954 
   19955 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   19956 vmaxnmvq_f64 (float64x2_t __a)
   19957 {
   19958   return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
   19959 }
   19960 
   19961 /* vmin  */
   19962 
   19963 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   19964 vmin_f32 (float32x2_t __a, float32x2_t __b)
   19965 {
   19966   return __builtin_aarch64_smin_nanv2sf (__a, __b);
   19967 }
   19968 
   19969 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   19970 vmin_s8 (int8x8_t __a, int8x8_t __b)
   19971 {
   19972   return __builtin_aarch64_sminv8qi (__a, __b);
   19973 }
   19974 
   19975 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   19976 vmin_s16 (int16x4_t __a, int16x4_t __b)
   19977 {
   19978   return __builtin_aarch64_sminv4hi (__a, __b);
   19979 }
   19980 
   19981 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   19982 vmin_s32 (int32x2_t __a, int32x2_t __b)
   19983 {
   19984   return __builtin_aarch64_sminv2si (__a, __b);
   19985 }
   19986 
   19987 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   19988 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
   19989 {
   19990   return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
   19991 						 (int8x8_t) __b);
   19992 }
   19993 
   19994 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   19995 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
   19996 {
   19997   return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
   19998 						  (int16x4_t) __b);
   19999 }
   20000 
   20001 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20002 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
   20003 {
   20004   return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
   20005 						  (int32x2_t) __b);
   20006 }
   20007 
   20008 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20009 vminq_f32 (float32x4_t __a, float32x4_t __b)
   20010 {
   20011   return __builtin_aarch64_smin_nanv4sf (__a, __b);
   20012 }
   20013 
   20014 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20015 vminq_f64 (float64x2_t __a, float64x2_t __b)
   20016 {
   20017   return __builtin_aarch64_smin_nanv2df (__a, __b);
   20018 }
   20019 
   20020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   20021 vminq_s8 (int8x16_t __a, int8x16_t __b)
   20022 {
   20023   return __builtin_aarch64_sminv16qi (__a, __b);
   20024 }
   20025 
   20026 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20027 vminq_s16 (int16x8_t __a, int16x8_t __b)
   20028 {
   20029   return __builtin_aarch64_sminv8hi (__a, __b);
   20030 }
   20031 
   20032 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20033 vminq_s32 (int32x4_t __a, int32x4_t __b)
   20034 {
   20035   return __builtin_aarch64_sminv4si (__a, __b);
   20036 }
   20037 
   20038 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   20039 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
   20040 {
   20041   return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
   20042 						   (int8x16_t) __b);
   20043 }
   20044 
   20045 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20046 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
   20047 {
   20048   return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
   20049 						  (int16x8_t) __b);
   20050 }
   20051 
   20052 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20053 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
   20054 {
   20055   return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
   20056 						  (int32x4_t) __b);
   20057 }
   20058 
   20059 /* vminnm  */
   20060 
   20061 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20062 vminnm_f32 (float32x2_t __a, float32x2_t __b)
   20063 {
   20064   return __builtin_aarch64_sminv2sf (__a, __b);
   20065 }
   20066 
   20067 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20068 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
   20069 {
   20070   return __builtin_aarch64_sminv4sf (__a, __b);
   20071 }
   20072 
   20073 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20074 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
   20075 {
   20076   return __builtin_aarch64_sminv2df (__a, __b);
   20077 }
   20078 
   20079 /* vminv  */
   20080 
   20081 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   20082 vminv_f32 (float32x2_t __a)
   20083 {
   20084   return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
   20085 			0);
   20086 }
   20087 
   20088 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   20089 vminv_s8 (int8x8_t __a)
   20090 {
   20091   return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
   20092 		       0);
   20093 }
   20094 
   20095 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   20096 vminv_s16 (int16x4_t __a)
   20097 {
   20098   return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
   20099 }
   20100 
   20101 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   20102 vminv_s32 (int32x2_t __a)
   20103 {
   20104   return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
   20105 }
   20106 
   20107 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   20108 vminv_u8 (uint8x8_t __a)
   20109 {
   20110   return vget_lane_u8 ((uint8x8_t)
   20111 		__builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
   20112 		0);
   20113 }
   20114 
   20115 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   20116 vminv_u16 (uint16x4_t __a)
   20117 {
   20118   return vget_lane_u16 ((uint16x4_t)
   20119 		__builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
   20120 		0);
   20121 }
   20122 
   20123 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   20124 vminv_u32 (uint32x2_t __a)
   20125 {
   20126   return vget_lane_u32 ((uint32x2_t)
   20127 		__builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
   20128 		0);
   20129 }
   20130 
   20131 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   20132 vminvq_f32 (float32x4_t __a)
   20133 {
   20134   return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
   20135 			 0);
   20136 }
   20137 
   20138 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   20139 vminvq_f64 (float64x2_t __a)
   20140 {
   20141   return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
   20142 			 0);
   20143 }
   20144 
   20145 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   20146 vminvq_s8 (int8x16_t __a)
   20147 {
   20148   return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
   20149 }
   20150 
   20151 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   20152 vminvq_s16 (int16x8_t __a)
   20153 {
   20154   return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
   20155 }
   20156 
   20157 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   20158 vminvq_s32 (int32x4_t __a)
   20159 {
   20160   return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
   20161 }
   20162 
   20163 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   20164 vminvq_u8 (uint8x16_t __a)
   20165 {
   20166   return vgetq_lane_u8 ((uint8x16_t)
   20167 		__builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
   20168 		0);
   20169 }
   20170 
   20171 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   20172 vminvq_u16 (uint16x8_t __a)
   20173 {
   20174   return vgetq_lane_u16 ((uint16x8_t)
   20175 		__builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
   20176 		0);
   20177 }
   20178 
   20179 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   20180 vminvq_u32 (uint32x4_t __a)
   20181 {
   20182   return vgetq_lane_u32 ((uint32x4_t)
   20183 		__builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
   20184 		0);
   20185 }
   20186 
   20187 /* vminnmv  */
   20188 
   20189 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   20190 vminnmv_f32 (float32x2_t __a)
   20191 {
   20192   return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
   20193 }
   20194 
   20195 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   20196 vminnmvq_f32 (float32x4_t __a)
   20197 {
   20198   return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
   20199 }
   20200 
   20201 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   20202 vminnmvq_f64 (float64x2_t __a)
   20203 {
   20204   return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
   20205 }
   20206 
   20207 /* vmla */
   20208 
   20209 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20210 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
   20211 {
   20212   return a + b * c;
   20213 }
   20214 
   20215 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20216 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
   20217 {
   20218   return a + b * c;
   20219 }
   20220 
   20221 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20222 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
   20223 {
   20224   return a + b * c;
   20225 }
   20226 
   20227 /* vmla_lane  */
   20228 
   20229 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20230 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
   20231 	       float32x2_t __c, const int __lane)
   20232 {
   20233   return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
   20234 }
   20235 
   20236 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20237 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
   20238 		int16x4_t __c, const int __lane)
   20239 {
   20240   return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
   20241 }
   20242 
   20243 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20244 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
   20245 		int32x2_t __c, const int __lane)
   20246 {
   20247   return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
   20248 }
   20249 
   20250 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   20251 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
   20252 		uint16x4_t __c, const int __lane)
   20253 {
   20254   return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
   20255 }
   20256 
   20257 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20258 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
   20259 	       uint32x2_t __c, const int __lane)
   20260 {
   20261   return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
   20262 }
   20263 
   20264 /* vmla_laneq  */
   20265 
   20266 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20267 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
   20268 	        float32x4_t __c, const int __lane)
   20269 {
   20270   return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
   20271 }
   20272 
   20273 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20274 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
   20275 		int16x8_t __c, const int __lane)
   20276 {
   20277   return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
   20278 }
   20279 
   20280 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20281 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
   20282 		int32x4_t __c, const int __lane)
   20283 {
   20284   return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
   20285 }
   20286 
   20287 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   20288 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
   20289 		uint16x8_t __c, const int __lane)
   20290 {
   20291   return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
   20292 }
   20293 
   20294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20295 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
   20296 		uint32x4_t __c, const int __lane)
   20297 {
   20298   return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
   20299 }
   20300 
   20301 /* vmlaq_lane  */
   20302 
   20303 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20304 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
   20305 		float32x2_t __c, const int __lane)
   20306 {
   20307   return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
   20308 }
   20309 
   20310 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20311 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
   20312 		int16x4_t __c, const int __lane)
   20313 {
   20314   return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
   20315 }
   20316 
   20317 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20318 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
   20319 		int32x2_t __c, const int __lane)
   20320 {
   20321   return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
   20322 }
   20323 
   20324 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20325 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
   20326 		uint16x4_t __c, const int __lane)
   20327 {
   20328   return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
   20329 }
   20330 
   20331 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20332 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
   20333 		uint32x2_t __c, const int __lane)
   20334 {
   20335   return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
   20336 }
   20337 
   20338   /* vmlaq_laneq  */
   20339 
   20340 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20341 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
   20342 		 float32x4_t __c, const int __lane)
   20343 {
   20344   return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
   20345 }
   20346 
   20347 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20348 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
   20349 		int16x8_t __c, const int __lane)
   20350 {
   20351   return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
   20352 }
   20353 
   20354 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20355 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
   20356 		int32x4_t __c, const int __lane)
   20357 {
   20358   return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
   20359 }
   20360 
   20361 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20362 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
   20363 		uint16x8_t __c, const int __lane)
   20364 {
   20365   return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
   20366 }
   20367 
   20368 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20369 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
   20370 		uint32x4_t __c, const int __lane)
   20371 {
   20372   return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
   20373 }
   20374 
   20375 /* vmls  */
   20376 
   20377 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20378 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
   20379 {
   20380   return a - b * c;
   20381 }
   20382 
   20383 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20384 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
   20385 {
   20386   return a - b * c;
   20387 }
   20388 
   20389 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20390 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
   20391 {
   20392   return a - b * c;
   20393 }
   20394 
   20395 /* vmls_lane  */
   20396 
   20397 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20398 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
   20399 	       float32x2_t __c, const int __lane)
   20400 {
   20401   return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
   20402 }
   20403 
   20404 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20405 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
   20406 		int16x4_t __c, const int __lane)
   20407 {
   20408   return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
   20409 }
   20410 
   20411 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20412 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
   20413 		int32x2_t __c, const int __lane)
   20414 {
   20415   return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
   20416 }
   20417 
   20418 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   20419 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
   20420 		uint16x4_t __c, const int __lane)
   20421 {
   20422   return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
   20423 }
   20424 
   20425 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20426 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
   20427 	       uint32x2_t __c, const int __lane)
   20428 {
   20429   return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
   20430 }
   20431 
   20432 /* vmls_laneq  */
   20433 
   20434 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20435 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
   20436 	       float32x4_t __c, const int __lane)
   20437 {
   20438   return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
   20439 }
   20440 
   20441 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20442 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
   20443 		int16x8_t __c, const int __lane)
   20444 {
   20445   return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
   20446 }
   20447 
   20448 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20449 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
   20450 		int32x4_t __c, const int __lane)
   20451 {
   20452   return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
   20453 }
   20454 
   20455 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   20456 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
   20457 		uint16x8_t __c, const int __lane)
   20458 {
   20459   return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
   20460 }
   20461 
   20462 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20463 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
   20464 		uint32x4_t __c, const int __lane)
   20465 {
   20466   return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
   20467 }
   20468 
   20469 /* vmlsq_lane  */
   20470 
   20471 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20472 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
   20473 		float32x2_t __c, const int __lane)
   20474 {
   20475   return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
   20476 }
   20477 
   20478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20479 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
   20480 		int16x4_t __c, const int __lane)
   20481 {
   20482   return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
   20483 }
   20484 
   20485 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20486 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
   20487 		int32x2_t __c, const int __lane)
   20488 {
   20489   return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
   20490 }
   20491 
   20492 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20493 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
   20494 		uint16x4_t __c, const int __lane)
   20495 {
   20496   return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
   20497 }
   20498 
   20499 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20500 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
   20501 		uint32x2_t __c, const int __lane)
   20502 {
   20503   return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
   20504 }
   20505 
   20506   /* vmlsq_laneq  */
   20507 
   20508 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20509 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
   20510 		float32x4_t __c, const int __lane)
   20511 {
   20512   return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
   20513 }
   20514 
   20515 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20516 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
   20517 		int16x8_t __c, const int __lane)
   20518 {
   20519   return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
   20520 }
   20521 
   20522 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20523 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
   20524 		int32x4_t __c, const int __lane)
   20525 {
   20526   return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
   20527 }
   20528 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20529 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
   20530 		uint16x8_t __c, const int __lane)
   20531 {
   20532   return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
   20533 }
   20534 
   20535 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20536 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
   20537 		uint32x4_t __c, const int __lane)
   20538 {
   20539   return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
   20540 }
   20541 
   20542 /* vmov_n_  */
   20543 
   20544 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20545 vmov_n_f32 (float32_t __a)
   20546 {
   20547   return vdup_n_f32 (__a);
   20548 }
   20549 
   20550 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   20551 vmov_n_f64 (float64_t __a)
   20552 {
   20553   return __a;
   20554 }
   20555 
   20556 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   20557 vmov_n_p8 (poly8_t __a)
   20558 {
   20559   return vdup_n_p8 (__a);
   20560 }
   20561 
   20562 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
   20563 vmov_n_p16 (poly16_t __a)
   20564 {
   20565   return vdup_n_p16 (__a);
   20566 }
   20567 
   20568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   20569 vmov_n_s8 (int8_t __a)
   20570 {
   20571   return vdup_n_s8 (__a);
   20572 }
   20573 
   20574 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20575 vmov_n_s16 (int16_t __a)
   20576 {
   20577   return vdup_n_s16 (__a);
   20578 }
   20579 
   20580 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20581 vmov_n_s32 (int32_t __a)
   20582 {
   20583   return vdup_n_s32 (__a);
   20584 }
   20585 
   20586 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   20587 vmov_n_s64 (int64_t __a)
   20588 {
   20589   return __a;
   20590 }
   20591 
   20592 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   20593 vmov_n_u8 (uint8_t __a)
   20594 {
   20595   return vdup_n_u8 (__a);
   20596 }
   20597 
   20598 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   20599 vmov_n_u16 (uint16_t __a)
   20600 {
   20601     return vdup_n_u16 (__a);
   20602 }
   20603 
   20604 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20605 vmov_n_u32 (uint32_t __a)
   20606 {
   20607    return vdup_n_u32 (__a);
   20608 }
   20609 
   20610 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   20611 vmov_n_u64 (uint64_t __a)
   20612 {
   20613    return __a;
   20614 }
   20615 
   20616 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20617 vmovq_n_f32 (float32_t __a)
   20618 {
   20619   return vdupq_n_f32 (__a);
   20620 }
   20621 
   20622 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20623 vmovq_n_f64 (float64_t __a)
   20624 {
   20625   return vdupq_n_f64 (__a);
   20626 }
   20627 
   20628 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
   20629 vmovq_n_p8 (poly8_t __a)
   20630 {
   20631   return vdupq_n_p8 (__a);
   20632 }
   20633 
   20634 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
   20635 vmovq_n_p16 (poly16_t __a)
   20636 {
   20637   return vdupq_n_p16 (__a);
   20638 }
   20639 
   20640 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   20641 vmovq_n_s8 (int8_t __a)
   20642 {
   20643   return vdupq_n_s8 (__a);
   20644 }
   20645 
   20646 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20647 vmovq_n_s16 (int16_t __a)
   20648 {
   20649   return vdupq_n_s16 (__a);
   20650 }
   20651 
   20652 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20653 vmovq_n_s32 (int32_t __a)
   20654 {
   20655   return vdupq_n_s32 (__a);
   20656 }
   20657 
   20658 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   20659 vmovq_n_s64 (int64_t __a)
   20660 {
   20661   return vdupq_n_s64 (__a);
   20662 }
   20663 
   20664 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   20665 vmovq_n_u8 (uint8_t __a)
   20666 {
   20667   return vdupq_n_u8 (__a);
   20668 }
   20669 
   20670 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20671 vmovq_n_u16 (uint16_t __a)
   20672 {
   20673   return vdupq_n_u16 (__a);
   20674 }
   20675 
   20676 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20677 vmovq_n_u32 (uint32_t __a)
   20678 {
   20679   return vdupq_n_u32 (__a);
   20680 }
   20681 
   20682 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   20683 vmovq_n_u64 (uint64_t __a)
   20684 {
   20685   return vdupq_n_u64 (__a);
   20686 }
   20687 
   20688 /* vmul_lane  */
   20689 
   20690 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20691 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
   20692 {
   20693   return __a * __aarch64_vget_lane_f32 (__b, __lane);
   20694 }
   20695 
   20696 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   20697 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
   20698 {
   20699   return __a * __b;
   20700 }
   20701 
   20702 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20703 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
   20704 {
   20705   return __a * __aarch64_vget_lane_s16 (__b, __lane);
   20706 }
   20707 
   20708 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20709 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
   20710 {
   20711   return __a * __aarch64_vget_lane_s32 (__b, __lane);
   20712 }
   20713 
   20714 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   20715 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
   20716 {
   20717   return __a * __aarch64_vget_lane_u16 (__b, __lane);
   20718 }
   20719 
   20720 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20721 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
   20722 {
   20723   return __a * __aarch64_vget_lane_u32 (__b, __lane);
   20724 }
   20725 
   20726 /* vmul_laneq  */
   20727 
   20728 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20729 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
   20730 {
   20731   return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
   20732 }
   20733 
   20734 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   20735 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
   20736 {
   20737   return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
   20738 }
   20739 
   20740 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20741 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
   20742 {
   20743   return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
   20744 }
   20745 
   20746 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20747 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
   20748 {
   20749   return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
   20750 }
   20751 
   20752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   20753 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
   20754 {
   20755   return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
   20756 }
   20757 
   20758 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   20759 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
   20760 {
   20761   return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
   20762 }
   20763 
   20764 /* vmulq_lane  */
   20765 
   20766 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20767 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
   20768 {
   20769   return __a * __aarch64_vget_lane_f32 (__b, __lane);
   20770 }
   20771 
   20772 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20773 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
   20774 {
   20775   return __a * __b;
   20776 }
   20777 
   20778 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20779 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
   20780 {
   20781   return __a * __aarch64_vget_lane_s16 (__b, __lane);
   20782 }
   20783 
   20784 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20785 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
   20786 {
   20787   return __a * __aarch64_vget_lane_s32 (__b, __lane);
   20788 }
   20789 
   20790 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20791 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
   20792 {
   20793   return __a * __aarch64_vget_lane_u16 (__b, __lane);
   20794 }
   20795 
   20796 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20797 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
   20798 {
   20799   return __a * __aarch64_vget_lane_u32 (__b, __lane);
   20800 }
   20801 
   20802 /* vmulq_laneq  */
   20803 
   20804 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20805 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
   20806 {
   20807   return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
   20808 }
   20809 
   20810 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20811 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
   20812 {
   20813   return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
   20814 }
   20815 
   20816 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20817 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
   20818 {
   20819   return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
   20820 }
   20821 
   20822 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20823 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
   20824 {
   20825   return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
   20826 }
   20827 
   20828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   20829 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
   20830 {
   20831   return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
   20832 }
   20833 
   20834 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   20835 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
   20836 {
   20837   return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
   20838 }
   20839 
   20840 /* vneg  */
   20841 
   20842 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   20843 vneg_f32 (float32x2_t __a)
   20844 {
   20845   return -__a;
   20846 }
   20847 
   20848 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
   20849 vneg_f64 (float64x1_t __a)
   20850 {
   20851   return -__a;
   20852 }
   20853 
   20854 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   20855 vneg_s8 (int8x8_t __a)
   20856 {
   20857   return -__a;
   20858 }
   20859 
   20860 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   20861 vneg_s16 (int16x4_t __a)
   20862 {
   20863   return -__a;
   20864 }
   20865 
   20866 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   20867 vneg_s32 (int32x2_t __a)
   20868 {
   20869   return -__a;
   20870 }
   20871 
   20872 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   20873 vneg_s64 (int64x1_t __a)
   20874 {
   20875   return -__a;
   20876 }
   20877 
   20878 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   20879 vnegq_f32 (float32x4_t __a)
   20880 {
   20881   return -__a;
   20882 }
   20883 
   20884 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   20885 vnegq_f64 (float64x2_t __a)
   20886 {
   20887   return -__a;
   20888 }
   20889 
   20890 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   20891 vnegq_s8 (int8x16_t __a)
   20892 {
   20893   return -__a;
   20894 }
   20895 
   20896 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   20897 vnegq_s16 (int16x8_t __a)
   20898 {
   20899   return -__a;
   20900 }
   20901 
   20902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20903 vnegq_s32 (int32x4_t __a)
   20904 {
   20905   return -__a;
   20906 }
   20907 
   20908 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   20909 vnegq_s64 (int64x2_t __a)
   20910 {
   20911   return -__a;
   20912 }
   20913 
   20914 /* vqabs */
   20915 
   20916 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   20917 vqabsq_s64 (int64x2_t __a)
   20918 {
   20919   return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
   20920 }
   20921 
   20922 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   20923 vqabsb_s8 (int8_t __a)
   20924 {
   20925   return (int8_t) __builtin_aarch64_sqabsqi (__a);
   20926 }
   20927 
   20928 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   20929 vqabsh_s16 (int16_t __a)
   20930 {
   20931   return (int16_t) __builtin_aarch64_sqabshi (__a);
   20932 }
   20933 
   20934 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   20935 vqabss_s32 (int32_t __a)
   20936 {
   20937   return (int32_t) __builtin_aarch64_sqabssi (__a);
   20938 }
   20939 
   20940 /* vqadd */
   20941 
   20942 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   20943 vqaddb_s8 (int8_t __a, int8_t __b)
   20944 {
   20945   return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
   20946 }
   20947 
   20948 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   20949 vqaddh_s16 (int16_t __a, int16_t __b)
   20950 {
   20951   return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
   20952 }
   20953 
   20954 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   20955 vqadds_s32 (int32_t __a, int32_t __b)
   20956 {
   20957   return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
   20958 }
   20959 
   20960 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   20961 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
   20962 {
   20963   return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
   20964 }
   20965 
   20966 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   20967 vqaddb_u8 (uint8_t __a, uint8_t __b)
   20968 {
   20969   return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b);
   20970 }
   20971 
   20972 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   20973 vqaddh_u16 (uint16_t __a, uint16_t __b)
   20974 {
   20975   return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b);
   20976 }
   20977 
   20978 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   20979 vqadds_u32 (uint32_t __a, uint32_t __b)
   20980 {
   20981   return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b);
   20982 }
   20983 
   20984 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   20985 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
   20986 {
   20987   return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
   20988 }
   20989 
   20990 /* vqdmlal */
   20991 
   20992 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20993 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
   20994 {
   20995   return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
   20996 }
   20997 
   20998 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   20999 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
   21000 {
   21001   return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
   21002 }
   21003 
   21004 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21005 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
   21006 		       int const __d)
   21007 {
   21008   return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
   21009 }
   21010 
   21011 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21012 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
   21013 			int const __d)
   21014 {
   21015   return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
   21016 }
   21017 
   21018 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21019 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
   21020 {
   21021   return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
   21022 }
   21023 
   21024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21025 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
   21026 {
   21027   return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
   21028 }
   21029 
   21030 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21031 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
   21032 {
   21033   return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
   21034 }
   21035 
   21036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21037 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
   21038 {
   21039   return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
   21040 }
   21041 
   21042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21043 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
   21044 {
   21045   return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
   21046 }
   21047 
   21048 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21049 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
   21050 {
   21051   return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
   21052 }
   21053 
   21054 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21055 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
   21056 		       int const __d)
   21057 {
   21058   return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
   21059 }
   21060 
   21061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21062 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
   21063 			int const __d)
   21064 {
   21065   return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
   21066 }
   21067 
   21068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21069 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
   21070 {
   21071   return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
   21072 }
   21073 
   21074 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21075 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
   21076 {
   21077   return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
   21078 }
   21079 
   21080 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21081 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
   21082 {
   21083   return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
   21084 }
   21085 
   21086 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21087 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
   21088 {
   21089   return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
   21090 }
   21091 
   21092 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21093 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
   21094 {
   21095   return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
   21096 }
   21097 
   21098 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21099 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
   21100 {
   21101   return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
   21102 }
   21103 
   21104 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21105 vqdmlals_s32 (int64x1_t __a, int32_t __b, int32_t __c)
   21106 {
   21107   return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
   21108 }
   21109 
   21110 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21111 vqdmlals_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d)
   21112 {
   21113   return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
   21114 }
   21115 
   21116 /* vqdmlsl */
   21117 
   21118 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21119 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
   21120 {
   21121   return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
   21122 }
   21123 
   21124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21125 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
   21126 {
   21127   return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
   21128 }
   21129 
   21130 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21131 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
   21132 		       int const __d)
   21133 {
   21134   return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
   21135 }
   21136 
   21137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21138 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
   21139 			int const __d)
   21140 {
   21141   return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
   21142 }
   21143 
   21144 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21145 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
   21146 {
   21147   return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
   21148 }
   21149 
   21150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21151 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
   21152 {
   21153   return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
   21154 }
   21155 
   21156 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21157 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
   21158 {
   21159   return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
   21160 }
   21161 
   21162 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21163 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
   21164 {
   21165   return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
   21166 }
   21167 
   21168 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21169 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
   21170 {
   21171   return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
   21172 }
   21173 
   21174 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21175 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
   21176 {
   21177   return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
   21178 }
   21179 
   21180 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21181 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
   21182 		       int const __d)
   21183 {
   21184   return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
   21185 }
   21186 
   21187 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21188 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
   21189 			int const __d)
   21190 {
   21191   return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
   21192 }
   21193 
   21194 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21195 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
   21196 {
   21197   return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
   21198 }
   21199 
   21200 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21201 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
   21202 {
   21203   return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
   21204 }
   21205 
   21206 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21207 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
   21208 {
   21209   return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
   21210 }
   21211 
   21212 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21213 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
   21214 {
   21215   return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
   21216 }
   21217 
   21218 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21219 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
   21220 {
   21221   return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
   21222 }
   21223 
   21224 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21225 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
   21226 {
   21227   return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
   21228 }
   21229 
   21230 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21231 vqdmlsls_s32 (int64x1_t __a, int32_t __b, int32_t __c)
   21232 {
   21233   return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
   21234 }
   21235 
   21236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21237 vqdmlsls_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d)
   21238 {
   21239   return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
   21240 }
   21241 
   21242 /* vqdmulh */
   21243 
   21244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   21245 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
   21246 {
   21247   return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
   21248 }
   21249 
   21250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   21251 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
   21252 {
   21253   return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
   21254 }
   21255 
   21256 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   21257 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
   21258 {
   21259   return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
   21260 }
   21261 
   21262 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21263 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
   21264 {
   21265   return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
   21266 }
   21267 
   21268 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21269 vqdmulhh_s16 (int16_t __a, int16_t __b)
   21270 {
   21271   return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
   21272 }
   21273 
   21274 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21275 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
   21276 {
   21277   return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
   21278 }
   21279 
   21280 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21281 vqdmulhs_s32 (int32_t __a, int32_t __b)
   21282 {
   21283   return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
   21284 }
   21285 
   21286 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21287 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
   21288 {
   21289   return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
   21290 }
   21291 
   21292 /* vqdmull */
   21293 
   21294 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21295 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
   21296 {
   21297   return __builtin_aarch64_sqdmullv4hi (__a, __b);
   21298 }
   21299 
   21300 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21301 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
   21302 {
   21303   return __builtin_aarch64_sqdmull2v8hi (__a, __b);
   21304 }
   21305 
   21306 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21307 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
   21308 {
   21309   return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
   21310 }
   21311 
   21312 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21313 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
   21314 {
   21315   return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
   21316 }
   21317 
   21318 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21319 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
   21320 {
   21321   return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
   21322 }
   21323 
   21324 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21325 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
   21326 {
   21327   return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
   21328 }
   21329 
   21330 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21331 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
   21332 {
   21333   return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
   21334 }
   21335 
   21336 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21337 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
   21338 {
   21339   return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
   21340 }
   21341 
   21342 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21343 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
   21344 {
   21345   return __builtin_aarch64_sqdmullv2si (__a, __b);
   21346 }
   21347 
   21348 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21349 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
   21350 {
   21351   return __builtin_aarch64_sqdmull2v4si (__a, __b);
   21352 }
   21353 
   21354 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21355 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
   21356 {
   21357   return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
   21358 }
   21359 
   21360 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21361 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
   21362 {
   21363   return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
   21364 }
   21365 
   21366 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21367 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
   21368 {
   21369   return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
   21370 }
   21371 
   21372 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21373 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
   21374 {
   21375   return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
   21376 }
   21377 
   21378 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21379 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
   21380 {
   21381   return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
   21382 }
   21383 
   21384 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21385 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
   21386 {
   21387   return __builtin_aarch64_sqdmull_nv2si (__a, __b);
   21388 }
   21389 
   21390 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21391 vqdmullh_s16 (int16_t __a, int16_t __b)
   21392 {
   21393   return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
   21394 }
   21395 
   21396 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21397 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
   21398 {
   21399   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
   21400 }
   21401 
   21402 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21403 vqdmulls_s32 (int32_t __a, int32_t __b)
   21404 {
   21405   return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
   21406 }
   21407 
   21408 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21409 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
   21410 {
   21411   return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
   21412 }
   21413 
   21414 /* vqmovn */
   21415 
   21416 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   21417 vqmovn_s16 (int16x8_t __a)
   21418 {
   21419   return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
   21420 }
   21421 
   21422 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   21423 vqmovn_s32 (int32x4_t __a)
   21424 {
   21425   return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
   21426 }
   21427 
   21428 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   21429 vqmovn_s64 (int64x2_t __a)
   21430 {
   21431   return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
   21432 }
   21433 
   21434 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   21435 vqmovn_u16 (uint16x8_t __a)
   21436 {
   21437   return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
   21438 }
   21439 
   21440 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   21441 vqmovn_u32 (uint32x4_t __a)
   21442 {
   21443   return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
   21444 }
   21445 
   21446 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   21447 vqmovn_u64 (uint64x2_t __a)
   21448 {
   21449   return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
   21450 }
   21451 
   21452 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   21453 vqmovnh_s16 (int16_t __a)
   21454 {
   21455   return (int8_t) __builtin_aarch64_sqmovnhi (__a);
   21456 }
   21457 
   21458 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21459 vqmovns_s32 (int32_t __a)
   21460 {
   21461   return (int16_t) __builtin_aarch64_sqmovnsi (__a);
   21462 }
   21463 
   21464 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21465 vqmovnd_s64 (int64x1_t __a)
   21466 {
   21467   return (int32_t) __builtin_aarch64_sqmovndi (__a);
   21468 }
   21469 
   21470 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   21471 vqmovnh_u16 (uint16_t __a)
   21472 {
   21473   return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
   21474 }
   21475 
   21476 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   21477 vqmovns_u32 (uint32_t __a)
   21478 {
   21479   return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
   21480 }
   21481 
   21482 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   21483 vqmovnd_u64 (uint64x1_t __a)
   21484 {
   21485   return (uint32_t) __builtin_aarch64_uqmovndi (__a);
   21486 }
   21487 
   21488 /* vqmovun */
   21489 
   21490 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   21491 vqmovun_s16 (int16x8_t __a)
   21492 {
   21493   return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
   21494 }
   21495 
   21496 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   21497 vqmovun_s32 (int32x4_t __a)
   21498 {
   21499   return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
   21500 }
   21501 
   21502 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   21503 vqmovun_s64 (int64x2_t __a)
   21504 {
   21505   return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
   21506 }
   21507 
   21508 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   21509 vqmovunh_s16 (int16_t __a)
   21510 {
   21511   return (int8_t) __builtin_aarch64_sqmovunhi (__a);
   21512 }
   21513 
   21514 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21515 vqmovuns_s32 (int32_t __a)
   21516 {
   21517   return (int16_t) __builtin_aarch64_sqmovunsi (__a);
   21518 }
   21519 
   21520 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21521 vqmovund_s64 (int64x1_t __a)
   21522 {
   21523   return (int32_t) __builtin_aarch64_sqmovundi (__a);
   21524 }
   21525 
   21526 /* vqneg */
   21527 
   21528 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21529 vqnegq_s64 (int64x2_t __a)
   21530 {
   21531   return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
   21532 }
   21533 
   21534 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   21535 vqnegb_s8 (int8_t __a)
   21536 {
   21537   return (int8_t) __builtin_aarch64_sqnegqi (__a);
   21538 }
   21539 
   21540 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21541 vqnegh_s16 (int16_t __a)
   21542 {
   21543   return (int16_t) __builtin_aarch64_sqneghi (__a);
   21544 }
   21545 
   21546 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21547 vqnegs_s32 (int32_t __a)
   21548 {
   21549   return (int32_t) __builtin_aarch64_sqnegsi (__a);
   21550 }
   21551 
   21552 /* vqrdmulh */
   21553 
   21554 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   21555 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
   21556 {
   21557   return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
   21558 }
   21559 
   21560 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   21561 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
   21562 {
   21563   return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
   21564 }
   21565 
   21566 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   21567 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
   21568 {
   21569   return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
   21570 }
   21571 
   21572 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21573 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
   21574 {
   21575   return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
   21576 }
   21577 
   21578 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21579 vqrdmulhh_s16 (int16_t __a, int16_t __b)
   21580 {
   21581   return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
   21582 }
   21583 
   21584 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21585 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
   21586 {
   21587   return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
   21588 }
   21589 
   21590 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21591 vqrdmulhs_s32 (int32_t __a, int32_t __b)
   21592 {
   21593   return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
   21594 }
   21595 
   21596 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21597 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
   21598 {
   21599   return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
   21600 }
   21601 
   21602 /* vqrshl */
   21603 
   21604 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   21605 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
   21606 {
   21607   return __builtin_aarch64_sqrshlv8qi (__a, __b);
   21608 }
   21609 
   21610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   21611 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
   21612 {
   21613   return __builtin_aarch64_sqrshlv4hi (__a, __b);
   21614 }
   21615 
   21616 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   21617 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
   21618 {
   21619   return __builtin_aarch64_sqrshlv2si (__a, __b);
   21620 }
   21621 
   21622 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21623 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
   21624 {
   21625   return __builtin_aarch64_sqrshldi (__a, __b);
   21626 }
   21627 
   21628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   21629 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
   21630 {
   21631   return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
   21632 }
   21633 
   21634 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   21635 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
   21636 {
   21637   return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
   21638 }
   21639 
   21640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   21641 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
   21642 {
   21643   return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
   21644 }
   21645 
   21646 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   21647 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
   21648 {
   21649   return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
   21650 }
   21651 
   21652 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   21653 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
   21654 {
   21655   return __builtin_aarch64_sqrshlv16qi (__a, __b);
   21656 }
   21657 
   21658 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   21659 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
   21660 {
   21661   return __builtin_aarch64_sqrshlv8hi (__a, __b);
   21662 }
   21663 
   21664 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21665 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
   21666 {
   21667   return __builtin_aarch64_sqrshlv4si (__a, __b);
   21668 }
   21669 
   21670 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21671 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
   21672 {
   21673   return __builtin_aarch64_sqrshlv2di (__a, __b);
   21674 }
   21675 
   21676 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   21677 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
   21678 {
   21679   return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
   21680 }
   21681 
   21682 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   21683 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
   21684 {
   21685   return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
   21686 }
   21687 
   21688 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   21689 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
   21690 {
   21691   return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
   21692 }
   21693 
   21694 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   21695 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
   21696 {
   21697   return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
   21698 }
   21699 
   21700 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   21701 vqrshlb_s8 (int8_t __a, int8_t __b)
   21702 {
   21703   return __builtin_aarch64_sqrshlqi (__a, __b);
   21704 }
   21705 
   21706 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21707 vqrshlh_s16 (int16_t __a, int16_t __b)
   21708 {
   21709   return __builtin_aarch64_sqrshlhi (__a, __b);
   21710 }
   21711 
   21712 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21713 vqrshls_s32 (int32_t __a, int32_t __b)
   21714 {
   21715   return __builtin_aarch64_sqrshlsi (__a, __b);
   21716 }
   21717 
   21718 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21719 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
   21720 {
   21721   return __builtin_aarch64_sqrshldi (__a, __b);
   21722 }
   21723 
   21724 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   21725 vqrshlb_u8 (uint8_t __a, uint8_t __b)
   21726 {
   21727   return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b);
   21728 }
   21729 
   21730 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   21731 vqrshlh_u16 (uint16_t __a, uint16_t __b)
   21732 {
   21733   return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b);
   21734 }
   21735 
   21736 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   21737 vqrshls_u32 (uint32_t __a, uint32_t __b)
   21738 {
   21739   return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b);
   21740 }
   21741 
   21742 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   21743 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
   21744 {
   21745   return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
   21746 }
   21747 
   21748 /* vqrshrn */
   21749 
   21750 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   21751 vqrshrn_n_s16 (int16x8_t __a, const int __b)
   21752 {
   21753   return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
   21754 }
   21755 
   21756 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   21757 vqrshrn_n_s32 (int32x4_t __a, const int __b)
   21758 {
   21759   return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
   21760 }
   21761 
   21762 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   21763 vqrshrn_n_s64 (int64x2_t __a, const int __b)
   21764 {
   21765   return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
   21766 }
   21767 
   21768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   21769 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
   21770 {
   21771   return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
   21772 }
   21773 
   21774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   21775 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
   21776 {
   21777   return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
   21778 }
   21779 
   21780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   21781 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
   21782 {
   21783   return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
   21784 }
   21785 
   21786 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   21787 vqrshrnh_n_s16 (int16_t __a, const int __b)
   21788 {
   21789   return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
   21790 }
   21791 
   21792 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21793 vqrshrns_n_s32 (int32_t __a, const int __b)
   21794 {
   21795   return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
   21796 }
   21797 
   21798 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21799 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
   21800 {
   21801   return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
   21802 }
   21803 
   21804 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   21805 vqrshrnh_n_u16 (uint16_t __a, const int __b)
   21806 {
   21807   return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
   21808 }
   21809 
   21810 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   21811 vqrshrns_n_u32 (uint32_t __a, const int __b)
   21812 {
   21813   return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
   21814 }
   21815 
   21816 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   21817 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
   21818 {
   21819   return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
   21820 }
   21821 
   21822 /* vqrshrun */
   21823 
   21824 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   21825 vqrshrun_n_s16 (int16x8_t __a, const int __b)
   21826 {
   21827   return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
   21828 }
   21829 
   21830 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   21831 vqrshrun_n_s32 (int32x4_t __a, const int __b)
   21832 {
   21833   return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
   21834 }
   21835 
   21836 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   21837 vqrshrun_n_s64 (int64x2_t __a, const int __b)
   21838 {
   21839   return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
   21840 }
   21841 
   21842 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   21843 vqrshrunh_n_s16 (int16_t __a, const int __b)
   21844 {
   21845   return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
   21846 }
   21847 
   21848 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21849 vqrshruns_n_s32 (int32_t __a, const int __b)
   21850 {
   21851   return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
   21852 }
   21853 
   21854 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21855 vqrshrund_n_s64 (int64x1_t __a, const int __b)
   21856 {
   21857   return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
   21858 }
   21859 
   21860 /* vqshl */
   21861 
   21862 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   21863 vqshl_s8 (int8x8_t __a, int8x8_t __b)
   21864 {
   21865   return __builtin_aarch64_sqshlv8qi (__a, __b);
   21866 }
   21867 
   21868 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   21869 vqshl_s16 (int16x4_t __a, int16x4_t __b)
   21870 {
   21871   return __builtin_aarch64_sqshlv4hi (__a, __b);
   21872 }
   21873 
   21874 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   21875 vqshl_s32 (int32x2_t __a, int32x2_t __b)
   21876 {
   21877   return __builtin_aarch64_sqshlv2si (__a, __b);
   21878 }
   21879 
   21880 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21881 vqshl_s64 (int64x1_t __a, int64x1_t __b)
   21882 {
   21883   return __builtin_aarch64_sqshldi (__a, __b);
   21884 }
   21885 
   21886 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   21887 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
   21888 {
   21889   return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
   21890 }
   21891 
   21892 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   21893 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
   21894 {
   21895   return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
   21896 }
   21897 
   21898 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   21899 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
   21900 {
   21901   return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
   21902 }
   21903 
   21904 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   21905 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
   21906 {
   21907   return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
   21908 }
   21909 
   21910 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   21911 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
   21912 {
   21913   return __builtin_aarch64_sqshlv16qi (__a, __b);
   21914 }
   21915 
   21916 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   21917 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
   21918 {
   21919   return __builtin_aarch64_sqshlv8hi (__a, __b);
   21920 }
   21921 
   21922 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   21923 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
   21924 {
   21925   return __builtin_aarch64_sqshlv4si (__a, __b);
   21926 }
   21927 
   21928 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   21929 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
   21930 {
   21931   return __builtin_aarch64_sqshlv2di (__a, __b);
   21932 }
   21933 
   21934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   21935 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
   21936 {
   21937   return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
   21938 }
   21939 
   21940 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   21941 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
   21942 {
   21943   return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
   21944 }
   21945 
   21946 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   21947 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
   21948 {
   21949   return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
   21950 }
   21951 
   21952 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   21953 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
   21954 {
   21955   return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
   21956 }
   21957 
   21958 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   21959 vqshlb_s8 (int8_t __a, int8_t __b)
   21960 {
   21961   return __builtin_aarch64_sqshlqi (__a, __b);
   21962 }
   21963 
   21964 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   21965 vqshlh_s16 (int16_t __a, int16_t __b)
   21966 {
   21967   return __builtin_aarch64_sqshlhi (__a, __b);
   21968 }
   21969 
   21970 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   21971 vqshls_s32 (int32_t __a, int32_t __b)
   21972 {
   21973   return __builtin_aarch64_sqshlsi (__a, __b);
   21974 }
   21975 
   21976 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   21977 vqshld_s64 (int64x1_t __a, int64x1_t __b)
   21978 {
   21979   return __builtin_aarch64_sqshldi (__a, __b);
   21980 }
   21981 
   21982 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   21983 vqshlb_u8 (uint8_t __a, uint8_t __b)
   21984 {
   21985   return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b);
   21986 }
   21987 
   21988 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   21989 vqshlh_u16 (uint16_t __a, uint16_t __b)
   21990 {
   21991   return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b);
   21992 }
   21993 
   21994 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   21995 vqshls_u32 (uint32_t __a, uint32_t __b)
   21996 {
   21997   return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b);
   21998 }
   21999 
   22000 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22001 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
   22002 {
   22003   return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
   22004 }
   22005 
   22006 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   22007 vqshl_n_s8 (int8x8_t __a, const int __b)
   22008 {
   22009   return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
   22010 }
   22011 
   22012 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   22013 vqshl_n_s16 (int16x4_t __a, const int __b)
   22014 {
   22015   return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
   22016 }
   22017 
   22018 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   22019 vqshl_n_s32 (int32x2_t __a, const int __b)
   22020 {
   22021   return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
   22022 }
   22023 
   22024 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22025 vqshl_n_s64 (int64x1_t __a, const int __b)
   22026 {
   22027   return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
   22028 }
   22029 
   22030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   22031 vqshl_n_u8 (uint8x8_t __a, const int __b)
   22032 {
   22033   return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
   22034 }
   22035 
   22036 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   22037 vqshl_n_u16 (uint16x4_t __a, const int __b)
   22038 {
   22039   return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
   22040 }
   22041 
   22042 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   22043 vqshl_n_u32 (uint32x2_t __a, const int __b)
   22044 {
   22045   return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
   22046 }
   22047 
   22048 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22049 vqshl_n_u64 (uint64x1_t __a, const int __b)
   22050 {
   22051   return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
   22052 }
   22053 
   22054 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   22055 vqshlq_n_s8 (int8x16_t __a, const int __b)
   22056 {
   22057   return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
   22058 }
   22059 
   22060 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   22061 vqshlq_n_s16 (int16x8_t __a, const int __b)
   22062 {
   22063   return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
   22064 }
   22065 
   22066 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   22067 vqshlq_n_s32 (int32x4_t __a, const int __b)
   22068 {
   22069   return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
   22070 }
   22071 
   22072 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   22073 vqshlq_n_s64 (int64x2_t __a, const int __b)
   22074 {
   22075   return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
   22076 }
   22077 
   22078 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   22079 vqshlq_n_u8 (uint8x16_t __a, const int __b)
   22080 {
   22081   return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
   22082 }
   22083 
   22084 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   22085 vqshlq_n_u16 (uint16x8_t __a, const int __b)
   22086 {
   22087   return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
   22088 }
   22089 
   22090 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   22091 vqshlq_n_u32 (uint32x4_t __a, const int __b)
   22092 {
   22093   return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
   22094 }
   22095 
   22096 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   22097 vqshlq_n_u64 (uint64x2_t __a, const int __b)
   22098 {
   22099   return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
   22100 }
   22101 
   22102 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   22103 vqshlb_n_s8 (int8_t __a, const int __b)
   22104 {
   22105   return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
   22106 }
   22107 
   22108 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   22109 vqshlh_n_s16 (int16_t __a, const int __b)
   22110 {
   22111   return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
   22112 }
   22113 
   22114 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   22115 vqshls_n_s32 (int32_t __a, const int __b)
   22116 {
   22117   return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
   22118 }
   22119 
   22120 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22121 vqshld_n_s64 (int64x1_t __a, const int __b)
   22122 {
   22123   return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
   22124 }
   22125 
   22126 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   22127 vqshlb_n_u8 (uint8_t __a, const int __b)
   22128 {
   22129   return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b);
   22130 }
   22131 
   22132 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   22133 vqshlh_n_u16 (uint16_t __a, const int __b)
   22134 {
   22135   return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b);
   22136 }
   22137 
   22138 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   22139 vqshls_n_u32 (uint32_t __a, const int __b)
   22140 {
   22141   return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b);
   22142 }
   22143 
   22144 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22145 vqshld_n_u64 (uint64x1_t __a, const int __b)
   22146 {
   22147   return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
   22148 }
   22149 
   22150 /* vqshlu */
   22151 
   22152 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   22153 vqshlu_n_s8 (int8x8_t __a, const int __b)
   22154 {
   22155   return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
   22156 }
   22157 
   22158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   22159 vqshlu_n_s16 (int16x4_t __a, const int __b)
   22160 {
   22161   return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
   22162 }
   22163 
   22164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   22165 vqshlu_n_s32 (int32x2_t __a, const int __b)
   22166 {
   22167   return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
   22168 }
   22169 
   22170 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22171 vqshlu_n_s64 (int64x1_t __a, const int __b)
   22172 {
   22173   return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
   22174 }
   22175 
   22176 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   22177 vqshluq_n_s8 (int8x16_t __a, const int __b)
   22178 {
   22179   return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
   22180 }
   22181 
   22182 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   22183 vqshluq_n_s16 (int16x8_t __a, const int __b)
   22184 {
   22185   return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
   22186 }
   22187 
   22188 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   22189 vqshluq_n_s32 (int32x4_t __a, const int __b)
   22190 {
   22191   return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
   22192 }
   22193 
   22194 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   22195 vqshluq_n_s64 (int64x2_t __a, const int __b)
   22196 {
   22197   return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
   22198 }
   22199 
   22200 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   22201 vqshlub_n_s8 (int8_t __a, const int __b)
   22202 {
   22203   return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
   22204 }
   22205 
   22206 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   22207 vqshluh_n_s16 (int16_t __a, const int __b)
   22208 {
   22209   return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
   22210 }
   22211 
   22212 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   22213 vqshlus_n_s32 (int32_t __a, const int __b)
   22214 {
   22215   return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
   22216 }
   22217 
   22218 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22219 vqshlud_n_s64 (int64x1_t __a, const int __b)
   22220 {
   22221   return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
   22222 }
   22223 
   22224 /* vqshrn */
   22225 
   22226 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   22227 vqshrn_n_s16 (int16x8_t __a, const int __b)
   22228 {
   22229   return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
   22230 }
   22231 
   22232 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   22233 vqshrn_n_s32 (int32x4_t __a, const int __b)
   22234 {
   22235   return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
   22236 }
   22237 
   22238 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   22239 vqshrn_n_s64 (int64x2_t __a, const int __b)
   22240 {
   22241   return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
   22242 }
   22243 
   22244 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   22245 vqshrn_n_u16 (uint16x8_t __a, const int __b)
   22246 {
   22247   return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
   22248 }
   22249 
   22250 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   22251 vqshrn_n_u32 (uint32x4_t __a, const int __b)
   22252 {
   22253   return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
   22254 }
   22255 
   22256 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   22257 vqshrn_n_u64 (uint64x2_t __a, const int __b)
   22258 {
   22259   return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
   22260 }
   22261 
   22262 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   22263 vqshrnh_n_s16 (int16_t __a, const int __b)
   22264 {
   22265   return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
   22266 }
   22267 
   22268 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   22269 vqshrns_n_s32 (int32_t __a, const int __b)
   22270 {
   22271   return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
   22272 }
   22273 
   22274 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   22275 vqshrnd_n_s64 (int64x1_t __a, const int __b)
   22276 {
   22277   return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
   22278 }
   22279 
   22280 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   22281 vqshrnh_n_u16 (uint16_t __a, const int __b)
   22282 {
   22283   return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
   22284 }
   22285 
   22286 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   22287 vqshrns_n_u32 (uint32_t __a, const int __b)
   22288 {
   22289   return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
   22290 }
   22291 
   22292 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   22293 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
   22294 {
   22295   return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
   22296 }
   22297 
   22298 /* vqshrun */
   22299 
   22300 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   22301 vqshrun_n_s16 (int16x8_t __a, const int __b)
   22302 {
   22303   return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
   22304 }
   22305 
   22306 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   22307 vqshrun_n_s32 (int32x4_t __a, const int __b)
   22308 {
   22309   return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
   22310 }
   22311 
   22312 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   22313 vqshrun_n_s64 (int64x2_t __a, const int __b)
   22314 {
   22315   return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
   22316 }
   22317 
   22318 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   22319 vqshrunh_n_s16 (int16_t __a, const int __b)
   22320 {
   22321   return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
   22322 }
   22323 
   22324 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   22325 vqshruns_n_s32 (int32_t __a, const int __b)
   22326 {
   22327   return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
   22328 }
   22329 
   22330 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   22331 vqshrund_n_s64 (int64x1_t __a, const int __b)
   22332 {
   22333   return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
   22334 }
   22335 
   22336 /* vqsub */
   22337 
   22338 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   22339 vqsubb_s8 (int8_t __a, int8_t __b)
   22340 {
   22341   return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
   22342 }
   22343 
   22344 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   22345 vqsubh_s16 (int16_t __a, int16_t __b)
   22346 {
   22347   return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
   22348 }
   22349 
   22350 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   22351 vqsubs_s32 (int32_t __a, int32_t __b)
   22352 {
   22353   return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
   22354 }
   22355 
   22356 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22357 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
   22358 {
   22359   return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
   22360 }
   22361 
   22362 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   22363 vqsubb_u8 (uint8_t __a, uint8_t __b)
   22364 {
   22365   return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b);
   22366 }
   22367 
   22368 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   22369 vqsubh_u16 (uint16_t __a, uint16_t __b)
   22370 {
   22371   return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b);
   22372 }
   22373 
   22374 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   22375 vqsubs_u32 (uint32_t __a, uint32_t __b)
   22376 {
   22377   return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b);
   22378 }
   22379 
   22380 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22381 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
   22382 {
   22383   return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
   22384 }
   22385 
   22386 /* vrecpe  */
   22387 
   22388 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   22389 vrecpes_f32 (float32_t __a)
   22390 {
   22391   return __builtin_aarch64_frecpesf (__a);
   22392 }
   22393 
   22394 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   22395 vrecped_f64 (float64_t __a)
   22396 {
   22397   return __builtin_aarch64_frecpedf (__a);
   22398 }
   22399 
   22400 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22401 vrecpe_f32 (float32x2_t __a)
   22402 {
   22403   return __builtin_aarch64_frecpev2sf (__a);
   22404 }
   22405 
   22406 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22407 vrecpeq_f32 (float32x4_t __a)
   22408 {
   22409   return __builtin_aarch64_frecpev4sf (__a);
   22410 }
   22411 
   22412 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22413 vrecpeq_f64 (float64x2_t __a)
   22414 {
   22415   return __builtin_aarch64_frecpev2df (__a);
   22416 }
   22417 
   22418 /* vrecps  */
   22419 
   22420 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   22421 vrecpss_f32 (float32_t __a, float32_t __b)
   22422 {
   22423   return __builtin_aarch64_frecpssf (__a, __b);
   22424 }
   22425 
   22426 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   22427 vrecpsd_f64 (float64_t __a, float64_t __b)
   22428 {
   22429   return __builtin_aarch64_frecpsdf (__a, __b);
   22430 }
   22431 
   22432 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22433 vrecps_f32 (float32x2_t __a, float32x2_t __b)
   22434 {
   22435   return __builtin_aarch64_frecpsv2sf (__a, __b);
   22436 }
   22437 
   22438 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22439 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
   22440 {
   22441   return __builtin_aarch64_frecpsv4sf (__a, __b);
   22442 }
   22443 
   22444 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22445 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
   22446 {
   22447   return __builtin_aarch64_frecpsv2df (__a, __b);
   22448 }
   22449 
   22450 /* vrecpx  */
   22451 
   22452 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
   22453 vrecpxs_f32 (float32_t __a)
   22454 {
   22455   return __builtin_aarch64_frecpxsf (__a);
   22456 }
   22457 
   22458 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
   22459 vrecpxd_f64 (float64_t __a)
   22460 {
   22461   return __builtin_aarch64_frecpxdf (__a);
   22462 }
   22463 
   22464 /* vrnd  */
   22465 
   22466 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22467 vrnd_f32 (float32x2_t __a)
   22468 {
   22469   return __builtin_aarch64_btruncv2sf (__a);
   22470 }
   22471 
   22472 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22473 vrndq_f32 (float32x4_t __a)
   22474 {
   22475   return __builtin_aarch64_btruncv4sf (__a);
   22476 }
   22477 
   22478 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22479 vrndq_f64 (float64x2_t __a)
   22480 {
   22481   return __builtin_aarch64_btruncv2df (__a);
   22482 }
   22483 
   22484 /* vrnda  */
   22485 
   22486 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22487 vrnda_f32 (float32x2_t __a)
   22488 {
   22489   return __builtin_aarch64_roundv2sf (__a);
   22490 }
   22491 
   22492 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22493 vrndaq_f32 (float32x4_t __a)
   22494 {
   22495   return __builtin_aarch64_roundv4sf (__a);
   22496 }
   22497 
   22498 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22499 vrndaq_f64 (float64x2_t __a)
   22500 {
   22501   return __builtin_aarch64_roundv2df (__a);
   22502 }
   22503 
   22504 /* vrndi  */
   22505 
   22506 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22507 vrndi_f32 (float32x2_t __a)
   22508 {
   22509   return __builtin_aarch64_nearbyintv2sf (__a);
   22510 }
   22511 
   22512 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22513 vrndiq_f32 (float32x4_t __a)
   22514 {
   22515   return __builtin_aarch64_nearbyintv4sf (__a);
   22516 }
   22517 
   22518 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22519 vrndiq_f64 (float64x2_t __a)
   22520 {
   22521   return __builtin_aarch64_nearbyintv2df (__a);
   22522 }
   22523 
   22524 /* vrndm  */
   22525 
   22526 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22527 vrndm_f32 (float32x2_t __a)
   22528 {
   22529   return __builtin_aarch64_floorv2sf (__a);
   22530 }
   22531 
   22532 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22533 vrndmq_f32 (float32x4_t __a)
   22534 {
   22535   return __builtin_aarch64_floorv4sf (__a);
   22536 }
   22537 
   22538 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22539 vrndmq_f64 (float64x2_t __a)
   22540 {
   22541   return __builtin_aarch64_floorv2df (__a);
   22542 }
   22543 
   22544 /* vrndn  */
   22545 
   22546 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22547 vrndn_f32 (float32x2_t __a)
   22548 {
   22549   return __builtin_aarch64_frintnv2sf (__a);
   22550 }
   22551 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22552 vrndnq_f32 (float32x4_t __a)
   22553 {
   22554   return __builtin_aarch64_frintnv4sf (__a);
   22555 }
   22556 
   22557 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22558 vrndnq_f64 (float64x2_t __a)
   22559 {
   22560   return __builtin_aarch64_frintnv2df (__a);
   22561 }
   22562 
   22563 /* vrndp  */
   22564 
   22565 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22566 vrndp_f32 (float32x2_t __a)
   22567 {
   22568   return __builtin_aarch64_ceilv2sf (__a);
   22569 }
   22570 
   22571 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22572 vrndpq_f32 (float32x4_t __a)
   22573 {
   22574   return __builtin_aarch64_ceilv4sf (__a);
   22575 }
   22576 
   22577 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22578 vrndpq_f64 (float64x2_t __a)
   22579 {
   22580   return __builtin_aarch64_ceilv2df (__a);
   22581 }
   22582 
   22583 /* vrndx  */
   22584 
   22585 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   22586 vrndx_f32 (float32x2_t __a)
   22587 {
   22588   return __builtin_aarch64_rintv2sf (__a);
   22589 }
   22590 
   22591 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   22592 vrndxq_f32 (float32x4_t __a)
   22593 {
   22594   return __builtin_aarch64_rintv4sf (__a);
   22595 }
   22596 
   22597 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   22598 vrndxq_f64 (float64x2_t __a)
   22599 {
   22600   return __builtin_aarch64_rintv2df (__a);
   22601 }
   22602 
   22603 /* vrshl */
   22604 
   22605 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   22606 vrshl_s8 (int8x8_t __a, int8x8_t __b)
   22607 {
   22608   return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
   22609 }
   22610 
   22611 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   22612 vrshl_s16 (int16x4_t __a, int16x4_t __b)
   22613 {
   22614   return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
   22615 }
   22616 
   22617 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   22618 vrshl_s32 (int32x2_t __a, int32x2_t __b)
   22619 {
   22620   return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
   22621 }
   22622 
   22623 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22624 vrshl_s64 (int64x1_t __a, int64x1_t __b)
   22625 {
   22626   return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
   22627 }
   22628 
   22629 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   22630 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
   22631 {
   22632   return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
   22633 }
   22634 
   22635 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   22636 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
   22637 {
   22638   return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
   22639 }
   22640 
   22641 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   22642 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
   22643 {
   22644   return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
   22645 }
   22646 
   22647 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22648 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
   22649 {
   22650   return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
   22651 }
   22652 
   22653 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   22654 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
   22655 {
   22656   return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
   22657 }
   22658 
   22659 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   22660 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
   22661 {
   22662   return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
   22663 }
   22664 
   22665 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   22666 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
   22667 {
   22668   return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
   22669 }
   22670 
   22671 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   22672 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
   22673 {
   22674   return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
   22675 }
   22676 
   22677 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   22678 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
   22679 {
   22680   return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
   22681 }
   22682 
   22683 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   22684 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
   22685 {
   22686   return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
   22687 }
   22688 
   22689 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   22690 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
   22691 {
   22692   return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
   22693 }
   22694 
   22695 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   22696 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
   22697 {
   22698   return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
   22699 }
   22700 
   22701 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22702 vrshld_s64 (int64x1_t __a, int64x1_t __b)
   22703 {
   22704   return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
   22705 }
   22706 
   22707 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22708 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
   22709 {
   22710   return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
   22711 }
   22712 
   22713 /* vrshr */
   22714 
   22715 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   22716 vrshr_n_s8 (int8x8_t __a, const int __b)
   22717 {
   22718   return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
   22719 }
   22720 
   22721 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   22722 vrshr_n_s16 (int16x4_t __a, const int __b)
   22723 {
   22724   return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
   22725 }
   22726 
   22727 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   22728 vrshr_n_s32 (int32x2_t __a, const int __b)
   22729 {
   22730   return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
   22731 }
   22732 
   22733 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22734 vrshr_n_s64 (int64x1_t __a, const int __b)
   22735 {
   22736   return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
   22737 }
   22738 
   22739 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   22740 vrshr_n_u8 (uint8x8_t __a, const int __b)
   22741 {
   22742   return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
   22743 }
   22744 
   22745 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   22746 vrshr_n_u16 (uint16x4_t __a, const int __b)
   22747 {
   22748   return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
   22749 }
   22750 
   22751 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   22752 vrshr_n_u32 (uint32x2_t __a, const int __b)
   22753 {
   22754   return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
   22755 }
   22756 
   22757 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22758 vrshr_n_u64 (uint64x1_t __a, const int __b)
   22759 {
   22760   return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
   22761 }
   22762 
   22763 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   22764 vrshrq_n_s8 (int8x16_t __a, const int __b)
   22765 {
   22766   return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
   22767 }
   22768 
   22769 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   22770 vrshrq_n_s16 (int16x8_t __a, const int __b)
   22771 {
   22772   return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
   22773 }
   22774 
   22775 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   22776 vrshrq_n_s32 (int32x4_t __a, const int __b)
   22777 {
   22778   return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
   22779 }
   22780 
   22781 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   22782 vrshrq_n_s64 (int64x2_t __a, const int __b)
   22783 {
   22784   return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
   22785 }
   22786 
   22787 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   22788 vrshrq_n_u8 (uint8x16_t __a, const int __b)
   22789 {
   22790   return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
   22791 }
   22792 
   22793 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   22794 vrshrq_n_u16 (uint16x8_t __a, const int __b)
   22795 {
   22796   return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
   22797 }
   22798 
   22799 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   22800 vrshrq_n_u32 (uint32x4_t __a, const int __b)
   22801 {
   22802   return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
   22803 }
   22804 
   22805 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   22806 vrshrq_n_u64 (uint64x2_t __a, const int __b)
   22807 {
   22808   return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
   22809 }
   22810 
   22811 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22812 vrshrd_n_s64 (int64x1_t __a, const int __b)
   22813 {
   22814   return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
   22815 }
   22816 
   22817 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22818 vrshrd_n_u64 (uint64x1_t __a, const int __b)
   22819 {
   22820   return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
   22821 }
   22822 
   22823 /* vrsra */
   22824 
   22825 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   22826 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
   22827 {
   22828   return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
   22829 }
   22830 
   22831 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   22832 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
   22833 {
   22834   return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
   22835 }
   22836 
   22837 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   22838 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
   22839 {
   22840   return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
   22841 }
   22842 
   22843 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22844 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   22845 {
   22846   return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
   22847 }
   22848 
   22849 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   22850 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
   22851 {
   22852   return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
   22853 						    (int8x8_t) __b, __c);
   22854 }
   22855 
   22856 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   22857 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
   22858 {
   22859   return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
   22860 						     (int16x4_t) __b, __c);
   22861 }
   22862 
   22863 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   22864 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
   22865 {
   22866   return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
   22867 						     (int32x2_t) __b, __c);
   22868 }
   22869 
   22870 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22871 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   22872 {
   22873   return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
   22874 						   (int64x1_t) __b, __c);
   22875 }
   22876 
   22877 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   22878 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
   22879 {
   22880   return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
   22881 }
   22882 
   22883 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   22884 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
   22885 {
   22886   return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
   22887 }
   22888 
   22889 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   22890 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
   22891 {
   22892   return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
   22893 }
   22894 
   22895 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   22896 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
   22897 {
   22898   return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
   22899 }
   22900 
   22901 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   22902 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
   22903 {
   22904   return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
   22905 						      (int8x16_t) __b, __c);
   22906 }
   22907 
   22908 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   22909 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
   22910 {
   22911   return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
   22912 						     (int16x8_t) __b, __c);
   22913 }
   22914 
   22915 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   22916 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
   22917 {
   22918   return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
   22919 						     (int32x4_t) __b, __c);
   22920 }
   22921 
   22922 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   22923 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   22924 {
   22925   return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
   22926 						     (int64x2_t) __b, __c);
   22927 }
   22928 
   22929 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   22930 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   22931 {
   22932   return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
   22933 }
   22934 
   22935 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   22936 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   22937 {
   22938   return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
   22939 }
   22940 
   22941 #ifdef __ARM_FEATURE_CRYPTO
   22942 
   22943 /* vsha1  */
   22944 
   22945 static __inline uint32x4_t
   22946 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
   22947 {
   22948   return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
   22949 }
   22950 static __inline uint32x4_t
   22951 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
   22952 {
   22953   return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
   22954 }
   22955 static __inline uint32x4_t
   22956 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
   22957 {
   22958   return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
   22959 }
   22960 
   22961 static __inline uint32_t
   22962 vsha1h_u32 (uint32_t hash_e)
   22963 {
   22964   return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
   22965 }
   22966 
   22967 static __inline uint32x4_t
   22968 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
   22969 {
   22970   return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
   22971 }
   22972 
   22973 static __inline uint32x4_t
   22974 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
   22975 {
   22976   return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
   22977 }
   22978 
   22979 static __inline uint32x4_t
   22980 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
   22981 {
   22982   return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
   22983 }
   22984 
   22985 static __inline uint32x4_t
   22986 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
   22987 {
   22988   return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
   22989 }
   22990 
   22991 static __inline uint32x4_t
   22992 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
   22993 {
   22994   return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
   22995 }
   22996 
   22997 static __inline uint32x4_t
   22998 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
   22999 {
   23000   return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
   23001 }
   23002 
   23003 static __inline poly128_t
   23004 vmull_p64 (poly64_t a, poly64_t b)
   23005 {
   23006   return
   23007     __builtin_aarch64_crypto_pmulldi_ppp (a, b);
   23008 }
   23009 
   23010 static __inline poly128_t
   23011 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
   23012 {
   23013   return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
   23014 }
   23015 
   23016 #endif
   23017 
   23018 /* vshl */
   23019 
   23020 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   23021 vshl_n_s8 (int8x8_t __a, const int __b)
   23022 {
   23023   return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
   23024 }
   23025 
   23026 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   23027 vshl_n_s16 (int16x4_t __a, const int __b)
   23028 {
   23029   return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
   23030 }
   23031 
   23032 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   23033 vshl_n_s32 (int32x2_t __a, const int __b)
   23034 {
   23035   return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
   23036 }
   23037 
   23038 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23039 vshl_n_s64 (int64x1_t __a, const int __b)
   23040 {
   23041   return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
   23042 }
   23043 
   23044 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   23045 vshl_n_u8 (uint8x8_t __a, const int __b)
   23046 {
   23047   return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
   23048 }
   23049 
   23050 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   23051 vshl_n_u16 (uint16x4_t __a, const int __b)
   23052 {
   23053   return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
   23054 }
   23055 
   23056 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   23057 vshl_n_u32 (uint32x2_t __a, const int __b)
   23058 {
   23059   return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
   23060 }
   23061 
   23062 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23063 vshl_n_u64 (uint64x1_t __a, const int __b)
   23064 {
   23065   return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
   23066 }
   23067 
   23068 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   23069 vshlq_n_s8 (int8x16_t __a, const int __b)
   23070 {
   23071   return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
   23072 }
   23073 
   23074 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23075 vshlq_n_s16 (int16x8_t __a, const int __b)
   23076 {
   23077   return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
   23078 }
   23079 
   23080 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23081 vshlq_n_s32 (int32x4_t __a, const int __b)
   23082 {
   23083   return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
   23084 }
   23085 
   23086 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23087 vshlq_n_s64 (int64x2_t __a, const int __b)
   23088 {
   23089   return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
   23090 }
   23091 
   23092 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   23093 vshlq_n_u8 (uint8x16_t __a, const int __b)
   23094 {
   23095   return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
   23096 }
   23097 
   23098 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23099 vshlq_n_u16 (uint16x8_t __a, const int __b)
   23100 {
   23101   return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
   23102 }
   23103 
   23104 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23105 vshlq_n_u32 (uint32x4_t __a, const int __b)
   23106 {
   23107   return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
   23108 }
   23109 
   23110 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23111 vshlq_n_u64 (uint64x2_t __a, const int __b)
   23112 {
   23113   return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
   23114 }
   23115 
   23116 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23117 vshld_n_s64 (int64x1_t __a, const int __b)
   23118 {
   23119   return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
   23120 }
   23121 
   23122 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23123 vshld_n_u64 (uint64x1_t __a, const int __b)
   23124 {
   23125   return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
   23126 }
   23127 
   23128 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   23129 vshl_s8 (int8x8_t __a, int8x8_t __b)
   23130 {
   23131   return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
   23132 }
   23133 
   23134 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   23135 vshl_s16 (int16x4_t __a, int16x4_t __b)
   23136 {
   23137   return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
   23138 }
   23139 
   23140 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   23141 vshl_s32 (int32x2_t __a, int32x2_t __b)
   23142 {
   23143   return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
   23144 }
   23145 
   23146 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23147 vshl_s64 (int64x1_t __a, int64x1_t __b)
   23148 {
   23149   return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
   23150 }
   23151 
   23152 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   23153 vshl_u8 (uint8x8_t __a, int8x8_t __b)
   23154 {
   23155   return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
   23156 }
   23157 
   23158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   23159 vshl_u16 (uint16x4_t __a, int16x4_t __b)
   23160 {
   23161   return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
   23162 }
   23163 
   23164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   23165 vshl_u32 (uint32x2_t __a, int32x2_t __b)
   23166 {
   23167   return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
   23168 }
   23169 
   23170 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23171 vshl_u64 (uint64x1_t __a, int64x1_t __b)
   23172 {
   23173   return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
   23174 }
   23175 
   23176 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   23177 vshlq_s8 (int8x16_t __a, int8x16_t __b)
   23178 {
   23179   return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
   23180 }
   23181 
   23182 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23183 vshlq_s16 (int16x8_t __a, int16x8_t __b)
   23184 {
   23185   return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
   23186 }
   23187 
   23188 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23189 vshlq_s32 (int32x4_t __a, int32x4_t __b)
   23190 {
   23191   return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
   23192 }
   23193 
   23194 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23195 vshlq_s64 (int64x2_t __a, int64x2_t __b)
   23196 {
   23197   return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
   23198 }
   23199 
   23200 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   23201 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
   23202 {
   23203   return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
   23204 }
   23205 
   23206 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23207 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
   23208 {
   23209   return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
   23210 }
   23211 
   23212 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23213 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
   23214 {
   23215   return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
   23216 }
   23217 
   23218 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23219 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
   23220 {
   23221   return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
   23222 }
   23223 
   23224 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23225 vshld_s64 (int64x1_t __a, int64x1_t __b)
   23226 {
   23227   return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
   23228 }
   23229 
   23230 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23231 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
   23232 {
   23233   return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
   23234 }
   23235 
   23236 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23237 vshll_high_n_s8 (int8x16_t __a, const int __b)
   23238 {
   23239   return __builtin_aarch64_sshll2_nv16qi (__a, __b);
   23240 }
   23241 
   23242 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23243 vshll_high_n_s16 (int16x8_t __a, const int __b)
   23244 {
   23245   return __builtin_aarch64_sshll2_nv8hi (__a, __b);
   23246 }
   23247 
   23248 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23249 vshll_high_n_s32 (int32x4_t __a, const int __b)
   23250 {
   23251   return __builtin_aarch64_sshll2_nv4si (__a, __b);
   23252 }
   23253 
   23254 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23255 vshll_high_n_u8 (uint8x16_t __a, const int __b)
   23256 {
   23257   return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
   23258 }
   23259 
   23260 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23261 vshll_high_n_u16 (uint16x8_t __a, const int __b)
   23262 {
   23263   return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
   23264 }
   23265 
   23266 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23267 vshll_high_n_u32 (uint32x4_t __a, const int __b)
   23268 {
   23269   return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
   23270 }
   23271 
   23272 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23273 vshll_n_s8 (int8x8_t __a, const int __b)
   23274 {
   23275   return __builtin_aarch64_sshll_nv8qi (__a, __b);
   23276 }
   23277 
   23278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23279 vshll_n_s16 (int16x4_t __a, const int __b)
   23280 {
   23281   return __builtin_aarch64_sshll_nv4hi (__a, __b);
   23282 }
   23283 
   23284 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23285 vshll_n_s32 (int32x2_t __a, const int __b)
   23286 {
   23287   return __builtin_aarch64_sshll_nv2si (__a, __b);
   23288 }
   23289 
   23290 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23291 vshll_n_u8 (uint8x8_t __a, const int __b)
   23292 {
   23293   return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
   23294 }
   23295 
   23296 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23297 vshll_n_u16 (uint16x4_t __a, const int __b)
   23298 {
   23299   return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
   23300 }
   23301 
   23302 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23303 vshll_n_u32 (uint32x2_t __a, const int __b)
   23304 {
   23305   return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
   23306 }
   23307 
   23308 /* vshr */
   23309 
   23310 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   23311 vshr_n_s8 (int8x8_t __a, const int __b)
   23312 {
   23313   return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
   23314 }
   23315 
   23316 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   23317 vshr_n_s16 (int16x4_t __a, const int __b)
   23318 {
   23319   return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
   23320 }
   23321 
   23322 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   23323 vshr_n_s32 (int32x2_t __a, const int __b)
   23324 {
   23325   return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
   23326 }
   23327 
   23328 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23329 vshr_n_s64 (int64x1_t __a, const int __b)
   23330 {
   23331   return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
   23332 }
   23333 
   23334 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   23335 vshr_n_u8 (uint8x8_t __a, const int __b)
   23336 {
   23337   return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
   23338 }
   23339 
   23340 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   23341 vshr_n_u16 (uint16x4_t __a, const int __b)
   23342 {
   23343   return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
   23344 }
   23345 
   23346 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   23347 vshr_n_u32 (uint32x2_t __a, const int __b)
   23348 {
   23349   return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
   23350 }
   23351 
   23352 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23353 vshr_n_u64 (uint64x1_t __a, const int __b)
   23354 {
   23355   return __builtin_aarch64_lshr_simddi_uus ( __a, __b);
   23356 }
   23357 
   23358 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   23359 vshrq_n_s8 (int8x16_t __a, const int __b)
   23360 {
   23361   return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
   23362 }
   23363 
   23364 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23365 vshrq_n_s16 (int16x8_t __a, const int __b)
   23366 {
   23367   return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
   23368 }
   23369 
   23370 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23371 vshrq_n_s32 (int32x4_t __a, const int __b)
   23372 {
   23373   return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
   23374 }
   23375 
   23376 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23377 vshrq_n_s64 (int64x2_t __a, const int __b)
   23378 {
   23379   return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
   23380 }
   23381 
   23382 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   23383 vshrq_n_u8 (uint8x16_t __a, const int __b)
   23384 {
   23385   return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
   23386 }
   23387 
   23388 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23389 vshrq_n_u16 (uint16x8_t __a, const int __b)
   23390 {
   23391   return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
   23392 }
   23393 
   23394 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23395 vshrq_n_u32 (uint32x4_t __a, const int __b)
   23396 {
   23397   return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
   23398 }
   23399 
   23400 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23401 vshrq_n_u64 (uint64x2_t __a, const int __b)
   23402 {
   23403   return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
   23404 }
   23405 
   23406 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23407 vshrd_n_s64 (int64x1_t __a, const int __b)
   23408 {
   23409   return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
   23410 }
   23411 
   23412 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
   23413 vshrd_n_u64 (uint64_t __a, const int __b)
   23414 {
   23415   return __builtin_aarch64_lshr_simddi_uus (__a, __b);
   23416 }
   23417 
   23418 /* vsli */
   23419 
   23420 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   23421 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
   23422 {
   23423   return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
   23424 }
   23425 
   23426 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   23427 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
   23428 {
   23429   return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
   23430 }
   23431 
   23432 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   23433 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
   23434 {
   23435   return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
   23436 }
   23437 
   23438 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23439 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   23440 {
   23441   return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
   23442 }
   23443 
   23444 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   23445 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
   23446 {
   23447   return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
   23448 						   (int8x8_t) __b, __c);
   23449 }
   23450 
   23451 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   23452 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
   23453 {
   23454   return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
   23455 						    (int16x4_t) __b, __c);
   23456 }
   23457 
   23458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   23459 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
   23460 {
   23461   return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
   23462 						    (int32x2_t) __b, __c);
   23463 }
   23464 
   23465 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23466 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   23467 {
   23468   return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
   23469 						  (int64x1_t) __b, __c);
   23470 }
   23471 
   23472 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   23473 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
   23474 {
   23475   return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
   23476 }
   23477 
   23478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23479 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
   23480 {
   23481   return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
   23482 }
   23483 
   23484 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23485 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
   23486 {
   23487   return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
   23488 }
   23489 
   23490 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23491 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
   23492 {
   23493   return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
   23494 }
   23495 
   23496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   23497 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
   23498 {
   23499   return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
   23500 						     (int8x16_t) __b, __c);
   23501 }
   23502 
   23503 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23504 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
   23505 {
   23506   return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
   23507 						    (int16x8_t) __b, __c);
   23508 }
   23509 
   23510 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23511 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
   23512 {
   23513   return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
   23514 						    (int32x4_t) __b, __c);
   23515 }
   23516 
   23517 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23518 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   23519 {
   23520   return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
   23521 						    (int64x2_t) __b, __c);
   23522 }
   23523 
   23524 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23525 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   23526 {
   23527   return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
   23528 }
   23529 
   23530 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23531 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   23532 {
   23533   return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
   23534 }
   23535 
   23536 /* vsqadd */
   23537 
   23538 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   23539 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
   23540 {
   23541   return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
   23542 						   (int8x8_t) __b);
   23543 }
   23544 
   23545 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   23546 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
   23547 {
   23548   return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
   23549 						    (int16x4_t) __b);
   23550 }
   23551 
   23552 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   23553 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
   23554 {
   23555   return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
   23556 						    (int32x2_t) __b);
   23557 }
   23558 
   23559 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23560 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
   23561 {
   23562   return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
   23563 }
   23564 
   23565 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   23566 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
   23567 {
   23568   return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
   23569 						     (int8x16_t) __b);
   23570 }
   23571 
   23572 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23573 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
   23574 {
   23575   return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
   23576 						    (int16x8_t) __b);
   23577 }
   23578 
   23579 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23580 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
   23581 {
   23582   return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
   23583 						    (int32x4_t) __b);
   23584 }
   23585 
   23586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23587 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
   23588 {
   23589   return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
   23590 						    (int64x2_t) __b);
   23591 }
   23592 
   23593 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
   23594 vsqaddb_u8 (uint8_t __a, int8_t __b)
   23595 {
   23596   return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b);
   23597 }
   23598 
   23599 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
   23600 vsqaddh_u16 (uint16_t __a, int16_t __b)
   23601 {
   23602   return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b);
   23603 }
   23604 
   23605 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
   23606 vsqadds_u32 (uint32_t __a, int32_t __b)
   23607 {
   23608   return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b);
   23609 }
   23610 
   23611 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23612 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
   23613 {
   23614   return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
   23615 }
   23616 
   23617 /* vsqrt */
   23618 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
   23619 vsqrt_f32 (float32x2_t a)
   23620 {
   23621   return __builtin_aarch64_sqrtv2sf (a);
   23622 }
   23623 
   23624 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
   23625 vsqrtq_f32 (float32x4_t a)
   23626 {
   23627   return __builtin_aarch64_sqrtv4sf (a);
   23628 }
   23629 
   23630 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
   23631 vsqrtq_f64 (float64x2_t a)
   23632 {
   23633   return __builtin_aarch64_sqrtv2df (a);
   23634 }
   23635 
   23636 /* vsra */
   23637 
   23638 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   23639 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
   23640 {
   23641   return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
   23642 }
   23643 
   23644 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   23645 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
   23646 {
   23647   return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
   23648 }
   23649 
   23650 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   23651 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
   23652 {
   23653   return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
   23654 }
   23655 
   23656 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23657 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   23658 {
   23659   return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
   23660 }
   23661 
   23662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   23663 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
   23664 {
   23665   return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
   23666 						   (int8x8_t) __b, __c);
   23667 }
   23668 
   23669 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   23670 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
   23671 {
   23672   return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
   23673 						    (int16x4_t) __b, __c);
   23674 }
   23675 
   23676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   23677 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
   23678 {
   23679   return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
   23680 						    (int32x2_t) __b, __c);
   23681 }
   23682 
   23683 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23684 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   23685 {
   23686   return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
   23687 						  (int64x1_t) __b, __c);
   23688 }
   23689 
   23690 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   23691 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
   23692 {
   23693   return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
   23694 }
   23695 
   23696 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23697 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
   23698 {
   23699   return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
   23700 }
   23701 
   23702 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23703 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
   23704 {
   23705   return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
   23706 }
   23707 
   23708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23709 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
   23710 {
   23711   return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
   23712 }
   23713 
   23714 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   23715 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
   23716 {
   23717   return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
   23718 						     (int8x16_t) __b, __c);
   23719 }
   23720 
   23721 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23722 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
   23723 {
   23724   return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
   23725 						    (int16x8_t) __b, __c);
   23726 }
   23727 
   23728 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23729 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
   23730 {
   23731   return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
   23732 						    (int32x4_t) __b, __c);
   23733 }
   23734 
   23735 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23736 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   23737 {
   23738   return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
   23739 						    (int64x2_t) __b, __c);
   23740 }
   23741 
   23742 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23743 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   23744 {
   23745   return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
   23746 }
   23747 
   23748 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23749 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   23750 {
   23751   return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
   23752 }
   23753 
   23754 /* vsri */
   23755 
   23756 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   23757 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
   23758 {
   23759   return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
   23760 }
   23761 
   23762 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   23763 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
   23764 {
   23765   return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
   23766 }
   23767 
   23768 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   23769 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
   23770 {
   23771   return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
   23772 }
   23773 
   23774 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23775 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   23776 {
   23777   return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
   23778 }
   23779 
   23780 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   23781 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
   23782 {
   23783   return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
   23784 						   (int8x8_t) __b, __c);
   23785 }
   23786 
   23787 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   23788 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
   23789 {
   23790   return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
   23791 						    (int16x4_t) __b, __c);
   23792 }
   23793 
   23794 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   23795 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
   23796 {
   23797   return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
   23798 						    (int32x2_t) __b, __c);
   23799 }
   23800 
   23801 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23802 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   23803 {
   23804   return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
   23805 						  (int64x1_t) __b, __c);
   23806 }
   23807 
   23808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   23809 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
   23810 {
   23811   return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
   23812 }
   23813 
   23814 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   23815 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
   23816 {
   23817   return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
   23818 }
   23819 
   23820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   23821 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
   23822 {
   23823   return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
   23824 }
   23825 
   23826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   23827 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
   23828 {
   23829   return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
   23830 }
   23831 
   23832 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   23833 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
   23834 {
   23835   return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
   23836 						     (int8x16_t) __b, __c);
   23837 }
   23838 
   23839 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   23840 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
   23841 {
   23842   return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
   23843 						    (int16x8_t) __b, __c);
   23844 }
   23845 
   23846 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   23847 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
   23848 {
   23849   return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
   23850 						    (int32x4_t) __b, __c);
   23851 }
   23852 
   23853 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   23854 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   23855 {
   23856   return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
   23857 						    (int64x2_t) __b, __c);
   23858 }
   23859 
   23860 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   23861 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
   23862 {
   23863   return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
   23864 }
   23865 
   23866 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   23867 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
   23868 {
   23869   return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
   23870 }
   23871 
   23872 /* vst1 */
   23873 
   23874 __extension__ static __inline void __attribute__ ((__always_inline__))
   23875 vst1_f32 (float32_t *a, float32x2_t b)
   23876 {
   23877   __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
   23878 }
   23879 
   23880 __extension__ static __inline void __attribute__ ((__always_inline__))
   23881 vst1_f64 (float64_t *a, float64x1_t b)
   23882 {
   23883   *a = b;
   23884 }
   23885 
   23886 __extension__ static __inline void __attribute__ ((__always_inline__))
   23887 vst1_p8 (poly8_t *a, poly8x8_t b)
   23888 {
   23889   __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
   23890 			     (int8x8_t) b);
   23891 }
   23892 
   23893 __extension__ static __inline void __attribute__ ((__always_inline__))
   23894 vst1_p16 (poly16_t *a, poly16x4_t b)
   23895 {
   23896   __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
   23897 			     (int16x4_t) b);
   23898 }
   23899 
   23900 __extension__ static __inline void __attribute__ ((__always_inline__))
   23901 vst1_s8 (int8_t *a, int8x8_t b)
   23902 {
   23903   __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
   23904 }
   23905 
   23906 __extension__ static __inline void __attribute__ ((__always_inline__))
   23907 vst1_s16 (int16_t *a, int16x4_t b)
   23908 {
   23909   __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
   23910 }
   23911 
   23912 __extension__ static __inline void __attribute__ ((__always_inline__))
   23913 vst1_s32 (int32_t *a, int32x2_t b)
   23914 {
   23915   __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
   23916 }
   23917 
   23918 __extension__ static __inline void __attribute__ ((__always_inline__))
   23919 vst1_s64 (int64_t *a, int64x1_t b)
   23920 {
   23921   *a = b;
   23922 }
   23923 
   23924 __extension__ static __inline void __attribute__ ((__always_inline__))
   23925 vst1_u8 (uint8_t *a, uint8x8_t b)
   23926 {
   23927   __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
   23928 			     (int8x8_t) b);
   23929 }
   23930 
   23931 __extension__ static __inline void __attribute__ ((__always_inline__))
   23932 vst1_u16 (uint16_t *a, uint16x4_t b)
   23933 {
   23934   __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
   23935 			     (int16x4_t) b);
   23936 }
   23937 
   23938 __extension__ static __inline void __attribute__ ((__always_inline__))
   23939 vst1_u32 (uint32_t *a, uint32x2_t b)
   23940 {
   23941   __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
   23942 			     (int32x2_t) b);
   23943 }
   23944 
   23945 __extension__ static __inline void __attribute__ ((__always_inline__))
   23946 vst1_u64 (uint64_t *a, uint64x1_t b)
   23947 {
   23948   *a = b;
   23949 }
   23950 
   23951 __extension__ static __inline void __attribute__ ((__always_inline__))
   23952 vst1q_f32 (float32_t *a, float32x4_t b)
   23953 {
   23954   __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
   23955 }
   23956 
   23957 __extension__ static __inline void __attribute__ ((__always_inline__))
   23958 vst1q_f64 (float64_t *a, float64x2_t b)
   23959 {
   23960   __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
   23961 }
   23962 
   23963 /* vst1q */
   23964 
   23965 __extension__ static __inline void __attribute__ ((__always_inline__))
   23966 vst1q_p8 (poly8_t *a, poly8x16_t b)
   23967 {
   23968   __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
   23969 			      (int8x16_t) b);
   23970 }
   23971 
   23972 __extension__ static __inline void __attribute__ ((__always_inline__))
   23973 vst1q_p16 (poly16_t *a, poly16x8_t b)
   23974 {
   23975   __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
   23976 			     (int16x8_t) b);
   23977 }
   23978 
   23979 __extension__ static __inline void __attribute__ ((__always_inline__))
   23980 vst1q_s8 (int8_t *a, int8x16_t b)
   23981 {
   23982   __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
   23983 }
   23984 
   23985 __extension__ static __inline void __attribute__ ((__always_inline__))
   23986 vst1q_s16 (int16_t *a, int16x8_t b)
   23987 {
   23988   __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
   23989 }
   23990 
   23991 __extension__ static __inline void __attribute__ ((__always_inline__))
   23992 vst1q_s32 (int32_t *a, int32x4_t b)
   23993 {
   23994   __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
   23995 }
   23996 
   23997 __extension__ static __inline void __attribute__ ((__always_inline__))
   23998 vst1q_s64 (int64_t *a, int64x2_t b)
   23999 {
   24000   __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
   24001 }
   24002 
   24003 __extension__ static __inline void __attribute__ ((__always_inline__))
   24004 vst1q_u8 (uint8_t *a, uint8x16_t b)
   24005 {
   24006   __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
   24007 			      (int8x16_t) b);
   24008 }
   24009 
   24010 __extension__ static __inline void __attribute__ ((__always_inline__))
   24011 vst1q_u16 (uint16_t *a, uint16x8_t b)
   24012 {
   24013   __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
   24014 			     (int16x8_t) b);
   24015 }
   24016 
   24017 __extension__ static __inline void __attribute__ ((__always_inline__))
   24018 vst1q_u32 (uint32_t *a, uint32x4_t b)
   24019 {
   24020   __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
   24021 			     (int32x4_t) b);
   24022 }
   24023 
   24024 __extension__ static __inline void __attribute__ ((__always_inline__))
   24025 vst1q_u64 (uint64_t *a, uint64x2_t b)
   24026 {
   24027   __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
   24028 			     (int64x2_t) b);
   24029 }
   24030 
   24031 /* vstn */
   24032 
   24033 __extension__ static __inline void
   24034 vst2_s64 (int64_t * __a, int64x1x2_t val)
   24035 {
   24036   __builtin_aarch64_simd_oi __o;
   24037   int64x2x2_t temp;
   24038   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
   24039   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
   24040   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
   24041   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
   24042   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
   24043 }
   24044 
   24045 __extension__ static __inline void
   24046 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
   24047 {
   24048   __builtin_aarch64_simd_oi __o;
   24049   uint64x2x2_t temp;
   24050   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24051   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24052   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
   24053   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
   24054   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
   24055 }
   24056 
   24057 __extension__ static __inline void
   24058 vst2_f64 (float64_t * __a, float64x1x2_t val)
   24059 {
   24060   __builtin_aarch64_simd_oi __o;
   24061   float64x2x2_t temp;
   24062   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24063   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24064   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
   24065   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
   24066   __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
   24067 }
   24068 
   24069 __extension__ static __inline void
   24070 vst2_s8 (int8_t * __a, int8x8x2_t val)
   24071 {
   24072   __builtin_aarch64_simd_oi __o;
   24073   int8x16x2_t temp;
   24074   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
   24075   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
   24076   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
   24077   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
   24078   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24079 }
   24080 
   24081 __extension__ static __inline void __attribute__ ((__always_inline__))
   24082 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
   24083 {
   24084   __builtin_aarch64_simd_oi __o;
   24085   poly8x16x2_t temp;
   24086   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24087   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24088   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
   24089   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
   24090   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24091 }
   24092 
   24093 __extension__ static __inline void __attribute__ ((__always_inline__))
   24094 vst2_s16 (int16_t * __a, int16x4x2_t val)
   24095 {
   24096   __builtin_aarch64_simd_oi __o;
   24097   int16x8x2_t temp;
   24098   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
   24099   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
   24100   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
   24101   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
   24102   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24103 }
   24104 
   24105 __extension__ static __inline void __attribute__ ((__always_inline__))
   24106 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
   24107 {
   24108   __builtin_aarch64_simd_oi __o;
   24109   poly16x8x2_t temp;
   24110   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24111   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24112   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
   24113   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
   24114   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24115 }
   24116 
   24117 __extension__ static __inline void __attribute__ ((__always_inline__))
   24118 vst2_s32 (int32_t * __a, int32x2x2_t val)
   24119 {
   24120   __builtin_aarch64_simd_oi __o;
   24121   int32x4x2_t temp;
   24122   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
   24123   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
   24124   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
   24125   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
   24126   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
   24127 }
   24128 
   24129 __extension__ static __inline void __attribute__ ((__always_inline__))
   24130 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
   24131 {
   24132   __builtin_aarch64_simd_oi __o;
   24133   uint8x16x2_t temp;
   24134   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24135   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24136   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
   24137   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
   24138   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24139 }
   24140 
   24141 __extension__ static __inline void __attribute__ ((__always_inline__))
   24142 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
   24143 {
   24144   __builtin_aarch64_simd_oi __o;
   24145   uint16x8x2_t temp;
   24146   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24147   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24148   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
   24149   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
   24150   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24151 }
   24152 
   24153 __extension__ static __inline void __attribute__ ((__always_inline__))
   24154 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
   24155 {
   24156   __builtin_aarch64_simd_oi __o;
   24157   uint32x4x2_t temp;
   24158   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24159   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24160   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
   24161   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
   24162   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
   24163 }
   24164 
   24165 __extension__ static __inline void __attribute__ ((__always_inline__))
   24166 vst2_f32 (float32_t * __a, float32x2x2_t val)
   24167 {
   24168   __builtin_aarch64_simd_oi __o;
   24169   float32x4x2_t temp;
   24170   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24171   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24172   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
   24173   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
   24174   __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
   24175 }
   24176 
   24177 __extension__ static __inline void __attribute__ ((__always_inline__))
   24178 vst2q_s8 (int8_t * __a, int8x16x2_t val)
   24179 {
   24180   __builtin_aarch64_simd_oi __o;
   24181   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
   24182   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
   24183   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24184 }
   24185 
   24186 __extension__ static __inline void __attribute__ ((__always_inline__))
   24187 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
   24188 {
   24189   __builtin_aarch64_simd_oi __o;
   24190   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
   24191   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
   24192   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24193 }
   24194 
   24195 __extension__ static __inline void __attribute__ ((__always_inline__))
   24196 vst2q_s16 (int16_t * __a, int16x8x2_t val)
   24197 {
   24198   __builtin_aarch64_simd_oi __o;
   24199   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
   24200   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
   24201   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24202 }
   24203 
   24204 __extension__ static __inline void __attribute__ ((__always_inline__))
   24205 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
   24206 {
   24207   __builtin_aarch64_simd_oi __o;
   24208   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
   24209   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
   24210   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24211 }
   24212 
   24213 __extension__ static __inline void __attribute__ ((__always_inline__))
   24214 vst2q_s32 (int32_t * __a, int32x4x2_t val)
   24215 {
   24216   __builtin_aarch64_simd_oi __o;
   24217   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
   24218   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
   24219   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
   24220 }
   24221 
   24222 __extension__ static __inline void __attribute__ ((__always_inline__))
   24223 vst2q_s64 (int64_t * __a, int64x2x2_t val)
   24224 {
   24225   __builtin_aarch64_simd_oi __o;
   24226   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
   24227   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
   24228   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
   24229 }
   24230 
   24231 __extension__ static __inline void __attribute__ ((__always_inline__))
   24232 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
   24233 {
   24234   __builtin_aarch64_simd_oi __o;
   24235   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
   24236   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
   24237   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24238 }
   24239 
   24240 __extension__ static __inline void __attribute__ ((__always_inline__))
   24241 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
   24242 {
   24243   __builtin_aarch64_simd_oi __o;
   24244   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
   24245   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
   24246   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24247 }
   24248 
   24249 __extension__ static __inline void __attribute__ ((__always_inline__))
   24250 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
   24251 {
   24252   __builtin_aarch64_simd_oi __o;
   24253   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
   24254   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
   24255   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
   24256 }
   24257 
   24258 __extension__ static __inline void __attribute__ ((__always_inline__))
   24259 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
   24260 {
   24261   __builtin_aarch64_simd_oi __o;
   24262   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
   24263   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
   24264   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
   24265 }
   24266 
   24267 __extension__ static __inline void __attribute__ ((__always_inline__))
   24268 vst2q_f32 (float32_t * __a, float32x4x2_t val)
   24269 {
   24270   __builtin_aarch64_simd_oi __o;
   24271   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
   24272   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
   24273   __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
   24274 }
   24275 
   24276 __extension__ static __inline void __attribute__ ((__always_inline__))
   24277 vst2q_f64 (float64_t * __a, float64x2x2_t val)
   24278 {
   24279   __builtin_aarch64_simd_oi __o;
   24280   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
   24281   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
   24282   __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
   24283 }
   24284 
   24285 __extension__ static __inline void
   24286 vst3_s64 (int64_t * __a, int64x1x3_t val)
   24287 {
   24288   __builtin_aarch64_simd_ci __o;
   24289   int64x2x3_t temp;
   24290   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
   24291   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
   24292   temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
   24293   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
   24294   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
   24295   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
   24296   __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
   24297 }
   24298 
   24299 __extension__ static __inline void
   24300 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
   24301 {
   24302   __builtin_aarch64_simd_ci __o;
   24303   uint64x2x3_t temp;
   24304   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24305   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24306   temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24307   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
   24308   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
   24309   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
   24310   __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
   24311 }
   24312 
   24313 __extension__ static __inline void
   24314 vst3_f64 (float64_t * __a, float64x1x3_t val)
   24315 {
   24316   __builtin_aarch64_simd_ci __o;
   24317   float64x2x3_t temp;
   24318   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24319   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24320   temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24321   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
   24322   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
   24323   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
   24324   __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
   24325 }
   24326 
   24327 __extension__ static __inline void
   24328 vst3_s8 (int8_t * __a, int8x8x3_t val)
   24329 {
   24330   __builtin_aarch64_simd_ci __o;
   24331   int8x16x3_t temp;
   24332   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
   24333   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
   24334   temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
   24335   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
   24336   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
   24337   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
   24338   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24339 }
   24340 
   24341 __extension__ static __inline void __attribute__ ((__always_inline__))
   24342 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
   24343 {
   24344   __builtin_aarch64_simd_ci __o;
   24345   poly8x16x3_t temp;
   24346   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24347   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24348   temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24349   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
   24350   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
   24351   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
   24352   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24353 }
   24354 
   24355 __extension__ static __inline void __attribute__ ((__always_inline__))
   24356 vst3_s16 (int16_t * __a, int16x4x3_t val)
   24357 {
   24358   __builtin_aarch64_simd_ci __o;
   24359   int16x8x3_t temp;
   24360   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
   24361   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
   24362   temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
   24363   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
   24364   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
   24365   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
   24366   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24367 }
   24368 
   24369 __extension__ static __inline void __attribute__ ((__always_inline__))
   24370 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
   24371 {
   24372   __builtin_aarch64_simd_ci __o;
   24373   poly16x8x3_t temp;
   24374   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24375   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24376   temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24377   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
   24378   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
   24379   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
   24380   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24381 }
   24382 
   24383 __extension__ static __inline void __attribute__ ((__always_inline__))
   24384 vst3_s32 (int32_t * __a, int32x2x3_t val)
   24385 {
   24386   __builtin_aarch64_simd_ci __o;
   24387   int32x4x3_t temp;
   24388   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
   24389   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
   24390   temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
   24391   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
   24392   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
   24393   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
   24394   __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
   24395 }
   24396 
   24397 __extension__ static __inline void __attribute__ ((__always_inline__))
   24398 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
   24399 {
   24400   __builtin_aarch64_simd_ci __o;
   24401   uint8x16x3_t temp;
   24402   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24403   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24404   temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24405   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
   24406   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
   24407   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
   24408   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24409 }
   24410 
   24411 __extension__ static __inline void __attribute__ ((__always_inline__))
   24412 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
   24413 {
   24414   __builtin_aarch64_simd_ci __o;
   24415   uint16x8x3_t temp;
   24416   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24417   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24418   temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24419   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
   24420   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
   24421   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
   24422   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24423 }
   24424 
   24425 __extension__ static __inline void __attribute__ ((__always_inline__))
   24426 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
   24427 {
   24428   __builtin_aarch64_simd_ci __o;
   24429   uint32x4x3_t temp;
   24430   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24431   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24432   temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24433   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
   24434   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
   24435   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
   24436   __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
   24437 }
   24438 
   24439 __extension__ static __inline void __attribute__ ((__always_inline__))
   24440 vst3_f32 (float32_t * __a, float32x2x3_t val)
   24441 {
   24442   __builtin_aarch64_simd_ci __o;
   24443   float32x4x3_t temp;
   24444   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24445   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24446   temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24447   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
   24448   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
   24449   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
   24450   __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
   24451 }
   24452 
   24453 __extension__ static __inline void __attribute__ ((__always_inline__))
   24454 vst3q_s8 (int8_t * __a, int8x16x3_t val)
   24455 {
   24456   __builtin_aarch64_simd_ci __o;
   24457   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
   24458   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
   24459   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
   24460   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24461 }
   24462 
   24463 __extension__ static __inline void __attribute__ ((__always_inline__))
   24464 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
   24465 {
   24466   __builtin_aarch64_simd_ci __o;
   24467   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
   24468   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
   24469   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
   24470   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24471 }
   24472 
   24473 __extension__ static __inline void __attribute__ ((__always_inline__))
   24474 vst3q_s16 (int16_t * __a, int16x8x3_t val)
   24475 {
   24476   __builtin_aarch64_simd_ci __o;
   24477   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
   24478   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
   24479   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
   24480   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24481 }
   24482 
   24483 __extension__ static __inline void __attribute__ ((__always_inline__))
   24484 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
   24485 {
   24486   __builtin_aarch64_simd_ci __o;
   24487   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
   24488   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
   24489   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
   24490   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24491 }
   24492 
   24493 __extension__ static __inline void __attribute__ ((__always_inline__))
   24494 vst3q_s32 (int32_t * __a, int32x4x3_t val)
   24495 {
   24496   __builtin_aarch64_simd_ci __o;
   24497   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
   24498   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
   24499   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
   24500   __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
   24501 }
   24502 
   24503 __extension__ static __inline void __attribute__ ((__always_inline__))
   24504 vst3q_s64 (int64_t * __a, int64x2x3_t val)
   24505 {
   24506   __builtin_aarch64_simd_ci __o;
   24507   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
   24508   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
   24509   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
   24510   __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
   24511 }
   24512 
   24513 __extension__ static __inline void __attribute__ ((__always_inline__))
   24514 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
   24515 {
   24516   __builtin_aarch64_simd_ci __o;
   24517   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
   24518   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
   24519   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
   24520   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24521 }
   24522 
   24523 __extension__ static __inline void __attribute__ ((__always_inline__))
   24524 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
   24525 {
   24526   __builtin_aarch64_simd_ci __o;
   24527   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
   24528   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
   24529   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
   24530   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24531 }
   24532 
   24533 __extension__ static __inline void __attribute__ ((__always_inline__))
   24534 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
   24535 {
   24536   __builtin_aarch64_simd_ci __o;
   24537   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
   24538   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
   24539   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
   24540   __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
   24541 }
   24542 
   24543 __extension__ static __inline void __attribute__ ((__always_inline__))
   24544 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
   24545 {
   24546   __builtin_aarch64_simd_ci __o;
   24547   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
   24548   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
   24549   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
   24550   __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
   24551 }
   24552 
   24553 __extension__ static __inline void __attribute__ ((__always_inline__))
   24554 vst3q_f32 (float32_t * __a, float32x4x3_t val)
   24555 {
   24556   __builtin_aarch64_simd_ci __o;
   24557   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
   24558   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
   24559   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
   24560   __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
   24561 }
   24562 
   24563 __extension__ static __inline void __attribute__ ((__always_inline__))
   24564 vst3q_f64 (float64_t * __a, float64x2x3_t val)
   24565 {
   24566   __builtin_aarch64_simd_ci __o;
   24567   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
   24568   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
   24569   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
   24570   __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
   24571 }
   24572 
   24573 __extension__ static __inline void
   24574 vst4_s64 (int64_t * __a, int64x1x4_t val)
   24575 {
   24576   __builtin_aarch64_simd_xi __o;
   24577   int64x2x4_t temp;
   24578   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
   24579   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
   24580   temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
   24581   temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
   24582   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
   24583   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
   24584   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
   24585   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
   24586   __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
   24587 }
   24588 
   24589 __extension__ static __inline void
   24590 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
   24591 {
   24592   __builtin_aarch64_simd_xi __o;
   24593   uint64x2x4_t temp;
   24594   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24595   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24596   temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24597   temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
   24598   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
   24599   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
   24600   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
   24601   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
   24602   __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
   24603 }
   24604 
   24605 __extension__ static __inline void
   24606 vst4_f64 (float64_t * __a, float64x1x4_t val)
   24607 {
   24608   __builtin_aarch64_simd_xi __o;
   24609   float64x2x4_t temp;
   24610   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24611   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24612   temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24613   temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
   24614   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
   24615   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
   24616   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
   24617   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
   24618   __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
   24619 }
   24620 
   24621 __extension__ static __inline void
   24622 vst4_s8 (int8_t * __a, int8x8x4_t val)
   24623 {
   24624   __builtin_aarch64_simd_xi __o;
   24625   int8x16x4_t temp;
   24626   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
   24627   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
   24628   temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
   24629   temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
   24630   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
   24631   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
   24632   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
   24633   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
   24634   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24635 }
   24636 
   24637 __extension__ static __inline void __attribute__ ((__always_inline__))
   24638 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
   24639 {
   24640   __builtin_aarch64_simd_xi __o;
   24641   poly8x16x4_t temp;
   24642   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24643   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24644   temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24645   temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
   24646   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
   24647   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
   24648   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
   24649   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
   24650   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24651 }
   24652 
   24653 __extension__ static __inline void __attribute__ ((__always_inline__))
   24654 vst4_s16 (int16_t * __a, int16x4x4_t val)
   24655 {
   24656   __builtin_aarch64_simd_xi __o;
   24657   int16x8x4_t temp;
   24658   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
   24659   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
   24660   temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
   24661   temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
   24662   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
   24663   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
   24664   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
   24665   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
   24666   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24667 }
   24668 
   24669 __extension__ static __inline void __attribute__ ((__always_inline__))
   24670 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
   24671 {
   24672   __builtin_aarch64_simd_xi __o;
   24673   poly16x8x4_t temp;
   24674   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24675   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24676   temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24677   temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
   24678   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
   24679   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
   24680   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
   24681   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
   24682   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24683 }
   24684 
   24685 __extension__ static __inline void __attribute__ ((__always_inline__))
   24686 vst4_s32 (int32_t * __a, int32x2x4_t val)
   24687 {
   24688   __builtin_aarch64_simd_xi __o;
   24689   int32x4x4_t temp;
   24690   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
   24691   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
   24692   temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
   24693   temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
   24694   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
   24695   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
   24696   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
   24697   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
   24698   __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
   24699 }
   24700 
   24701 __extension__ static __inline void __attribute__ ((__always_inline__))
   24702 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
   24703 {
   24704   __builtin_aarch64_simd_xi __o;
   24705   uint8x16x4_t temp;
   24706   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24707   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24708   temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24709   temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
   24710   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
   24711   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
   24712   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
   24713   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
   24714   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24715 }
   24716 
   24717 __extension__ static __inline void __attribute__ ((__always_inline__))
   24718 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
   24719 {
   24720   __builtin_aarch64_simd_xi __o;
   24721   uint16x8x4_t temp;
   24722   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24723   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24724   temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24725   temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
   24726   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
   24727   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
   24728   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
   24729   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
   24730   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24731 }
   24732 
   24733 __extension__ static __inline void __attribute__ ((__always_inline__))
   24734 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
   24735 {
   24736   __builtin_aarch64_simd_xi __o;
   24737   uint32x4x4_t temp;
   24738   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24739   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24740   temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24741   temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
   24742   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
   24743   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
   24744   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
   24745   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
   24746   __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
   24747 }
   24748 
   24749 __extension__ static __inline void __attribute__ ((__always_inline__))
   24750 vst4_f32 (float32_t * __a, float32x2x4_t val)
   24751 {
   24752   __builtin_aarch64_simd_xi __o;
   24753   float32x4x4_t temp;
   24754   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24755   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24756   temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24757   temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
   24758   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
   24759   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
   24760   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
   24761   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
   24762   __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
   24763 }
   24764 
   24765 __extension__ static __inline void __attribute__ ((__always_inline__))
   24766 vst4q_s8 (int8_t * __a, int8x16x4_t val)
   24767 {
   24768   __builtin_aarch64_simd_xi __o;
   24769   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
   24770   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
   24771   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
   24772   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
   24773   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24774 }
   24775 
   24776 __extension__ static __inline void __attribute__ ((__always_inline__))
   24777 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
   24778 {
   24779   __builtin_aarch64_simd_xi __o;
   24780   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
   24781   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
   24782   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
   24783   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
   24784   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24785 }
   24786 
   24787 __extension__ static __inline void __attribute__ ((__always_inline__))
   24788 vst4q_s16 (int16_t * __a, int16x8x4_t val)
   24789 {
   24790   __builtin_aarch64_simd_xi __o;
   24791   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
   24792   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
   24793   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
   24794   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
   24795   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24796 }
   24797 
   24798 __extension__ static __inline void __attribute__ ((__always_inline__))
   24799 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
   24800 {
   24801   __builtin_aarch64_simd_xi __o;
   24802   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
   24803   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
   24804   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
   24805   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
   24806   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24807 }
   24808 
   24809 __extension__ static __inline void __attribute__ ((__always_inline__))
   24810 vst4q_s32 (int32_t * __a, int32x4x4_t val)
   24811 {
   24812   __builtin_aarch64_simd_xi __o;
   24813   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
   24814   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
   24815   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
   24816   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
   24817   __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
   24818 }
   24819 
   24820 __extension__ static __inline void __attribute__ ((__always_inline__))
   24821 vst4q_s64 (int64_t * __a, int64x2x4_t val)
   24822 {
   24823   __builtin_aarch64_simd_xi __o;
   24824   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
   24825   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
   24826   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
   24827   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
   24828   __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
   24829 }
   24830 
   24831 __extension__ static __inline void __attribute__ ((__always_inline__))
   24832 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
   24833 {
   24834   __builtin_aarch64_simd_xi __o;
   24835   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
   24836   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
   24837   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
   24838   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
   24839   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
   24840 }
   24841 
   24842 __extension__ static __inline void __attribute__ ((__always_inline__))
   24843 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
   24844 {
   24845   __builtin_aarch64_simd_xi __o;
   24846   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
   24847   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
   24848   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
   24849   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
   24850   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
   24851 }
   24852 
   24853 __extension__ static __inline void __attribute__ ((__always_inline__))
   24854 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
   24855 {
   24856   __builtin_aarch64_simd_xi __o;
   24857   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
   24858   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
   24859   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
   24860   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
   24861   __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
   24862 }
   24863 
   24864 __extension__ static __inline void __attribute__ ((__always_inline__))
   24865 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
   24866 {
   24867   __builtin_aarch64_simd_xi __o;
   24868   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
   24869   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
   24870   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
   24871   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
   24872   __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
   24873 }
   24874 
   24875 __extension__ static __inline void __attribute__ ((__always_inline__))
   24876 vst4q_f32 (float32_t * __a, float32x4x4_t val)
   24877 {
   24878   __builtin_aarch64_simd_xi __o;
   24879   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
   24880   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
   24881   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
   24882   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
   24883   __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
   24884 }
   24885 
   24886 __extension__ static __inline void __attribute__ ((__always_inline__))
   24887 vst4q_f64 (float64_t * __a, float64x2x4_t val)
   24888 {
   24889   __builtin_aarch64_simd_xi __o;
   24890   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
   24891   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
   24892   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
   24893   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
   24894   __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
   24895 }
   24896 
   24897 /* vsub */
   24898 
   24899 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   24900 vsubd_s64 (int64x1_t __a, int64x1_t __b)
   24901 {
   24902   return __a - __b;
   24903 }
   24904 
   24905 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   24906 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
   24907 {
   24908   return __a - __b;
   24909 }
   24910 
   24911 /* vtbx1  */
   24912 
   24913 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   24914 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
   24915 {
   24916   uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
   24917 			      vmov_n_u8 (8));
   24918   int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
   24919 
   24920   return vbsl_s8 (__mask, __tbl, __r);
   24921 }
   24922 
   24923 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   24924 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
   24925 {
   24926   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
   24927   uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
   24928 
   24929   return vbsl_u8 (__mask, __tbl, __r);
   24930 }
   24931 
   24932 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   24933 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
   24934 {
   24935   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
   24936   poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
   24937 
   24938   return vbsl_p8 (__mask, __tbl, __r);
   24939 }
   24940 
   24941 /* vtbx3  */
   24942 
   24943 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   24944 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
   24945 {
   24946   uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
   24947 			      vmov_n_u8 (24));
   24948   int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
   24949 
   24950   return vbsl_s8 (__mask, __tbl, __r);
   24951 }
   24952 
   24953 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   24954 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
   24955 {
   24956   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
   24957   uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
   24958 
   24959   return vbsl_u8 (__mask, __tbl, __r);
   24960 }
   24961 
   24962 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
   24963 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
   24964 {
   24965   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
   24966   poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
   24967 
   24968   return vbsl_p8 (__mask, __tbl, __r);
   24969 }
   24970 
   24971 /* vtrn */
   24972 
   24973 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
   24974 vtrn_f32 (float32x2_t a, float32x2_t b)
   24975 {
   24976   return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
   24977 }
   24978 
   24979 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
   24980 vtrn_p8 (poly8x8_t a, poly8x8_t b)
   24981 {
   24982   return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
   24983 }
   24984 
   24985 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
   24986 vtrn_p16 (poly16x4_t a, poly16x4_t b)
   24987 {
   24988   return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
   24989 }
   24990 
   24991 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
   24992 vtrn_s8 (int8x8_t a, int8x8_t b)
   24993 {
   24994   return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
   24995 }
   24996 
   24997 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
   24998 vtrn_s16 (int16x4_t a, int16x4_t b)
   24999 {
   25000   return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
   25001 }
   25002 
   25003 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
   25004 vtrn_s32 (int32x2_t a, int32x2_t b)
   25005 {
   25006   return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
   25007 }
   25008 
   25009 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
   25010 vtrn_u8 (uint8x8_t a, uint8x8_t b)
   25011 {
   25012   return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
   25013 }
   25014 
   25015 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
   25016 vtrn_u16 (uint16x4_t a, uint16x4_t b)
   25017 {
   25018   return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
   25019 }
   25020 
   25021 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
   25022 vtrn_u32 (uint32x2_t a, uint32x2_t b)
   25023 {
   25024   return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
   25025 }
   25026 
   25027 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
   25028 vtrnq_f32 (float32x4_t a, float32x4_t b)
   25029 {
   25030   return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
   25031 }
   25032 
   25033 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
   25034 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
   25035 {
   25036   return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
   25037 }
   25038 
   25039 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
   25040 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
   25041 {
   25042   return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
   25043 }
   25044 
   25045 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
   25046 vtrnq_s8 (int8x16_t a, int8x16_t b)
   25047 {
   25048   return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
   25049 }
   25050 
   25051 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
   25052 vtrnq_s16 (int16x8_t a, int16x8_t b)
   25053 {
   25054   return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
   25055 }
   25056 
   25057 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
   25058 vtrnq_s32 (int32x4_t a, int32x4_t b)
   25059 {
   25060   return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
   25061 }
   25062 
   25063 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
   25064 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
   25065 {
   25066   return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
   25067 }
   25068 
   25069 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
   25070 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
   25071 {
   25072   return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
   25073 }
   25074 
   25075 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
   25076 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
   25077 {
   25078   return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
   25079 }
   25080 
   25081 /* vtst */
   25082 
   25083 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   25084 vtst_s8 (int8x8_t __a, int8x8_t __b)
   25085 {
   25086   return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
   25087 }
   25088 
   25089 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   25090 vtst_s16 (int16x4_t __a, int16x4_t __b)
   25091 {
   25092   return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
   25093 }
   25094 
   25095 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   25096 vtst_s32 (int32x2_t __a, int32x2_t __b)
   25097 {
   25098   return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
   25099 }
   25100 
   25101 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   25102 vtst_s64 (int64x1_t __a, int64x1_t __b)
   25103 {
   25104   return (__a & __b) ? -1ll : 0ll;
   25105 }
   25106 
   25107 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
   25108 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
   25109 {
   25110   return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
   25111 						 (int8x8_t) __b);
   25112 }
   25113 
   25114 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
   25115 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
   25116 {
   25117   return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
   25118 						  (int16x4_t) __b);
   25119 }
   25120 
   25121 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
   25122 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
   25123 {
   25124   return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
   25125 						  (int32x2_t) __b);
   25126 }
   25127 
   25128 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   25129 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
   25130 {
   25131   return (__a & __b) ? -1ll : 0ll;
   25132 }
   25133 
   25134 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   25135 vtstq_s8 (int8x16_t __a, int8x16_t __b)
   25136 {
   25137   return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
   25138 }
   25139 
   25140 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   25141 vtstq_s16 (int16x8_t __a, int16x8_t __b)
   25142 {
   25143   return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
   25144 }
   25145 
   25146 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   25147 vtstq_s32 (int32x4_t __a, int32x4_t __b)
   25148 {
   25149   return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
   25150 }
   25151 
   25152 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   25153 vtstq_s64 (int64x2_t __a, int64x2_t __b)
   25154 {
   25155   return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
   25156 }
   25157 
   25158 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
   25159 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
   25160 {
   25161   return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
   25162 						   (int8x16_t) __b);
   25163 }
   25164 
   25165 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
   25166 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
   25167 {
   25168   return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
   25169 						  (int16x8_t) __b);
   25170 }
   25171 
   25172 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
   25173 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
   25174 {
   25175   return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
   25176 						  (int32x4_t) __b);
   25177 }
   25178 
   25179 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
   25180 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
   25181 {
   25182   return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
   25183 						  (int64x2_t) __b);
   25184 }
   25185 
   25186 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   25187 vtstd_s64 (int64x1_t __a, int64x1_t __b)
   25188 {
   25189   return (__a & __b) ? -1ll : 0ll;
   25190 }
   25191 
   25192 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
   25193 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
   25194 {
   25195   return (__a & __b) ? -1ll : 0ll;
   25196 }
   25197 
   25198 /* vuqadd */
   25199 
   25200 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
   25201 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
   25202 {
   25203   return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
   25204 }
   25205 
   25206 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
   25207 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
   25208 {
   25209   return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
   25210 }
   25211 
   25212 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
   25213 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
   25214 {
   25215   return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
   25216 }
   25217 
   25218 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   25219 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
   25220 {
   25221   return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
   25222 }
   25223 
   25224 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
   25225 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
   25226 {
   25227   return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
   25228 }
   25229 
   25230 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
   25231 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
   25232 {
   25233   return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
   25234 }
   25235 
   25236 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
   25237 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
   25238 {
   25239   return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
   25240 }
   25241 
   25242 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
   25243 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
   25244 {
   25245   return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
   25246 }
   25247 
   25248 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
   25249 vuqaddb_s8 (int8_t __a, uint8_t __b)
   25250 {
   25251   return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b);
   25252 }
   25253 
   25254 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
   25255 vuqaddh_s16 (int16_t __a, uint16_t __b)
   25256 {
   25257   return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b);
   25258 }
   25259 
   25260 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
   25261 vuqadds_s32 (int32_t __a, uint32_t __b)
   25262 {
   25263   return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b);
   25264 }
   25265 
   25266 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
   25267 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
   25268 {
   25269   return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
   25270 }
   25271 
   25272 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
   25273   __extension__ static __inline rettype					\
   25274   __attribute__ ((__always_inline__))					\
   25275   v ## op ## Q ## _ ## funcsuffix (intype a, intype b)			\
   25276   {									\
   25277     return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b),	\
   25278 		      v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)};	\
   25279   }
   25280 
   25281 #define __INTERLEAVE_LIST(op)					\
   25282   __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,)	\
   25283   __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,)		\
   25284   __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,)		\
   25285   __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,)		\
   25286   __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,)		\
   25287   __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,)		\
   25288   __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,)		\
   25289   __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,)		\
   25290   __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,)		\
   25291   __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q)	\
   25292   __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q)		\
   25293   __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q)	\
   25294   __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q)		\
   25295   __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q)		\
   25296   __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q)		\
   25297   __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q)		\
   25298   __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q)	\
   25299   __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
   25300 
   25301 /* vuzp */
   25302 
   25303 __INTERLEAVE_LIST (uzp)
   25304 
   25305 /* vzip */
   25306 
   25307 __INTERLEAVE_LIST (zip)
   25308 
   25309 #undef __INTERLEAVE_LIST
   25310 #undef __DEFINTERLEAVE
   25311 
   25312 /* End of optimal implementations in approved order.  */
   25313 
   25314 #undef __aarch64_vget_lane_any
   25315 #undef __aarch64_vget_lane_f32
   25316 #undef __aarch64_vget_lane_f64
   25317 #undef __aarch64_vget_lane_p8
   25318 #undef __aarch64_vget_lane_p16
   25319 #undef __aarch64_vget_lane_s8
   25320 #undef __aarch64_vget_lane_s16
   25321 #undef __aarch64_vget_lane_s32
   25322 #undef __aarch64_vget_lane_s64
   25323 #undef __aarch64_vget_lane_u8
   25324 #undef __aarch64_vget_lane_u16
   25325 #undef __aarch64_vget_lane_u32
   25326 #undef __aarch64_vget_lane_u64
   25327 
   25328 #undef __aarch64_vgetq_lane_f32
   25329 #undef __aarch64_vgetq_lane_f64
   25330 #undef __aarch64_vgetq_lane_p8
   25331 #undef __aarch64_vgetq_lane_p16
   25332 #undef __aarch64_vgetq_lane_s8
   25333 #undef __aarch64_vgetq_lane_s16
   25334 #undef __aarch64_vgetq_lane_s32
   25335 #undef __aarch64_vgetq_lane_s64
   25336 #undef __aarch64_vgetq_lane_u8
   25337 #undef __aarch64_vgetq_lane_u16
   25338 #undef __aarch64_vgetq_lane_u32
   25339 #undef __aarch64_vgetq_lane_u64
   25340 
   25341 #undef __aarch64_vdup_lane_any
   25342 #undef __aarch64_vdup_lane_f32
   25343 #undef __aarch64_vdup_lane_f64
   25344 #undef __aarch64_vdup_lane_p8
   25345 #undef __aarch64_vdup_lane_p16
   25346 #undef __aarch64_vdup_lane_s8
   25347 #undef __aarch64_vdup_lane_s16
   25348 #undef __aarch64_vdup_lane_s32
   25349 #undef __aarch64_vdup_lane_s64
   25350 #undef __aarch64_vdup_lane_u8
   25351 #undef __aarch64_vdup_lane_u16
   25352 #undef __aarch64_vdup_lane_u32
   25353 #undef __aarch64_vdup_lane_u64
   25354 #undef __aarch64_vdup_laneq_f32
   25355 #undef __aarch64_vdup_laneq_f64
   25356 #undef __aarch64_vdup_laneq_p8
   25357 #undef __aarch64_vdup_laneq_p16
   25358 #undef __aarch64_vdup_laneq_s8
   25359 #undef __aarch64_vdup_laneq_s16
   25360 #undef __aarch64_vdup_laneq_s32
   25361 #undef __aarch64_vdup_laneq_s64
   25362 #undef __aarch64_vdup_laneq_u8
   25363 #undef __aarch64_vdup_laneq_u16
   25364 #undef __aarch64_vdup_laneq_u32
   25365 #undef __aarch64_vdup_laneq_u64
   25366 #undef __aarch64_vdupq_lane_f32
   25367 #undef __aarch64_vdupq_lane_f64
   25368 #undef __aarch64_vdupq_lane_p8
   25369 #undef __aarch64_vdupq_lane_p16
   25370 #undef __aarch64_vdupq_lane_s8
   25371 #undef __aarch64_vdupq_lane_s16
   25372 #undef __aarch64_vdupq_lane_s32
   25373 #undef __aarch64_vdupq_lane_s64
   25374 #undef __aarch64_vdupq_lane_u8
   25375 #undef __aarch64_vdupq_lane_u16
   25376 #undef __aarch64_vdupq_lane_u32
   25377 #undef __aarch64_vdupq_lane_u64
   25378 #undef __aarch64_vdupq_laneq_f32
   25379 #undef __aarch64_vdupq_laneq_f64
   25380 #undef __aarch64_vdupq_laneq_p8
   25381 #undef __aarch64_vdupq_laneq_p16
   25382 #undef __aarch64_vdupq_laneq_s8
   25383 #undef __aarch64_vdupq_laneq_s16
   25384 #undef __aarch64_vdupq_laneq_s32
   25385 #undef __aarch64_vdupq_laneq_s64
   25386 #undef __aarch64_vdupq_laneq_u8
   25387 #undef __aarch64_vdupq_laneq_u16
   25388 #undef __aarch64_vdupq_laneq_u32
   25389 #undef __aarch64_vdupq_laneq_u64
   25390 
   25391 #endif
   25392