1 /* ARM NEON intrinsics include file. 2 3 Copyright (C) 2011-2014 Free Software Foundation, Inc. 4 Contributed by ARM Ltd. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published 10 by the Free Software Foundation; either version 3, or (at your 11 option) any later version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 License for more details. 17 18 Under Section 7 of GPL version 3, you are granted additional 19 permissions described in the GCC Runtime Library Exception, version 20 3.1, as published by the Free Software Foundation. 21 22 You should have received a copy of the GNU General Public License and 23 a copy of the GCC Runtime Library Exception along with this program; 24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 25 <http://www.gnu.org/licenses/>. */ 26 27 #ifndef _AARCH64_NEON_H_ 28 #define _AARCH64_NEON_H_ 29 30 #include <stdint.h> 31 32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C) 33 #define __AARCH64_INT64_C(__C) ((int64_t) __C) 34 35 typedef __builtin_aarch64_simd_qi int8x8_t 36 __attribute__ ((__vector_size__ (8))); 37 typedef __builtin_aarch64_simd_hi int16x4_t 38 __attribute__ ((__vector_size__ (8))); 39 typedef __builtin_aarch64_simd_si int32x2_t 40 __attribute__ ((__vector_size__ (8))); 41 typedef int64_t int64x1_t; 42 typedef double float64x1_t; 43 typedef __builtin_aarch64_simd_sf float32x2_t 44 __attribute__ ((__vector_size__ (8))); 45 typedef __builtin_aarch64_simd_poly8 poly8x8_t 46 __attribute__ ((__vector_size__ (8))); 47 typedef __builtin_aarch64_simd_poly16 poly16x4_t 48 __attribute__ ((__vector_size__ (8))); 49 typedef __builtin_aarch64_simd_uqi uint8x8_t 50 __attribute__ ((__vector_size__ (8))); 51 typedef __builtin_aarch64_simd_uhi uint16x4_t 52 __attribute__ ((__vector_size__ (8))); 53 typedef __builtin_aarch64_simd_usi uint32x2_t 54 __attribute__ ((__vector_size__ (8))); 55 typedef uint64_t uint64x1_t; 56 typedef __builtin_aarch64_simd_qi int8x16_t 57 __attribute__ ((__vector_size__ (16))); 58 typedef __builtin_aarch64_simd_hi int16x8_t 59 __attribute__ ((__vector_size__ (16))); 60 typedef __builtin_aarch64_simd_si int32x4_t 61 __attribute__ ((__vector_size__ (16))); 62 typedef __builtin_aarch64_simd_di int64x2_t 63 __attribute__ ((__vector_size__ (16))); 64 typedef __builtin_aarch64_simd_sf float32x4_t 65 __attribute__ ((__vector_size__ (16))); 66 typedef __builtin_aarch64_simd_df float64x2_t 67 __attribute__ ((__vector_size__ (16))); 68 typedef __builtin_aarch64_simd_poly8 poly8x16_t 69 __attribute__ ((__vector_size__ (16))); 70 typedef __builtin_aarch64_simd_poly16 poly16x8_t 71 __attribute__ ((__vector_size__ (16))); 72 typedef __builtin_aarch64_simd_poly64 poly64x2_t 73 __attribute__ ((__vector_size__ (16))); 74 typedef __builtin_aarch64_simd_uqi uint8x16_t 75 __attribute__ ((__vector_size__ (16))); 76 typedef __builtin_aarch64_simd_uhi uint16x8_t 77 __attribute__ ((__vector_size__ (16))); 78 typedef __builtin_aarch64_simd_usi uint32x4_t 79 __attribute__ ((__vector_size__ (16))); 80 typedef __builtin_aarch64_simd_udi uint64x2_t 81 __attribute__ ((__vector_size__ (16))); 82 83 typedef float float32_t; 84 typedef double float64_t; 85 typedef __builtin_aarch64_simd_poly8 poly8_t; 86 typedef __builtin_aarch64_simd_poly16 poly16_t; 87 typedef __builtin_aarch64_simd_poly64 poly64_t; 88 typedef __builtin_aarch64_simd_poly128 poly128_t; 89 90 typedef struct int8x8x2_t 91 { 92 int8x8_t val[2]; 93 } int8x8x2_t; 94 95 typedef struct int8x16x2_t 96 { 97 int8x16_t val[2]; 98 } int8x16x2_t; 99 100 typedef struct int16x4x2_t 101 { 102 int16x4_t val[2]; 103 } int16x4x2_t; 104 105 typedef struct int16x8x2_t 106 { 107 int16x8_t val[2]; 108 } int16x8x2_t; 109 110 typedef struct int32x2x2_t 111 { 112 int32x2_t val[2]; 113 } int32x2x2_t; 114 115 typedef struct int32x4x2_t 116 { 117 int32x4_t val[2]; 118 } int32x4x2_t; 119 120 typedef struct int64x1x2_t 121 { 122 int64x1_t val[2]; 123 } int64x1x2_t; 124 125 typedef struct int64x2x2_t 126 { 127 int64x2_t val[2]; 128 } int64x2x2_t; 129 130 typedef struct uint8x8x2_t 131 { 132 uint8x8_t val[2]; 133 } uint8x8x2_t; 134 135 typedef struct uint8x16x2_t 136 { 137 uint8x16_t val[2]; 138 } uint8x16x2_t; 139 140 typedef struct uint16x4x2_t 141 { 142 uint16x4_t val[2]; 143 } uint16x4x2_t; 144 145 typedef struct uint16x8x2_t 146 { 147 uint16x8_t val[2]; 148 } uint16x8x2_t; 149 150 typedef struct uint32x2x2_t 151 { 152 uint32x2_t val[2]; 153 } uint32x2x2_t; 154 155 typedef struct uint32x4x2_t 156 { 157 uint32x4_t val[2]; 158 } uint32x4x2_t; 159 160 typedef struct uint64x1x2_t 161 { 162 uint64x1_t val[2]; 163 } uint64x1x2_t; 164 165 typedef struct uint64x2x2_t 166 { 167 uint64x2_t val[2]; 168 } uint64x2x2_t; 169 170 typedef struct float32x2x2_t 171 { 172 float32x2_t val[2]; 173 } float32x2x2_t; 174 175 typedef struct float32x4x2_t 176 { 177 float32x4_t val[2]; 178 } float32x4x2_t; 179 180 typedef struct float64x2x2_t 181 { 182 float64x2_t val[2]; 183 } float64x2x2_t; 184 185 typedef struct float64x1x2_t 186 { 187 float64x1_t val[2]; 188 } float64x1x2_t; 189 190 typedef struct poly8x8x2_t 191 { 192 poly8x8_t val[2]; 193 } poly8x8x2_t; 194 195 typedef struct poly8x16x2_t 196 { 197 poly8x16_t val[2]; 198 } poly8x16x2_t; 199 200 typedef struct poly16x4x2_t 201 { 202 poly16x4_t val[2]; 203 } poly16x4x2_t; 204 205 typedef struct poly16x8x2_t 206 { 207 poly16x8_t val[2]; 208 } poly16x8x2_t; 209 210 typedef struct int8x8x3_t 211 { 212 int8x8_t val[3]; 213 } int8x8x3_t; 214 215 typedef struct int8x16x3_t 216 { 217 int8x16_t val[3]; 218 } int8x16x3_t; 219 220 typedef struct int16x4x3_t 221 { 222 int16x4_t val[3]; 223 } int16x4x3_t; 224 225 typedef struct int16x8x3_t 226 { 227 int16x8_t val[3]; 228 } int16x8x3_t; 229 230 typedef struct int32x2x3_t 231 { 232 int32x2_t val[3]; 233 } int32x2x3_t; 234 235 typedef struct int32x4x3_t 236 { 237 int32x4_t val[3]; 238 } int32x4x3_t; 239 240 typedef struct int64x1x3_t 241 { 242 int64x1_t val[3]; 243 } int64x1x3_t; 244 245 typedef struct int64x2x3_t 246 { 247 int64x2_t val[3]; 248 } int64x2x3_t; 249 250 typedef struct uint8x8x3_t 251 { 252 uint8x8_t val[3]; 253 } uint8x8x3_t; 254 255 typedef struct uint8x16x3_t 256 { 257 uint8x16_t val[3]; 258 } uint8x16x3_t; 259 260 typedef struct uint16x4x3_t 261 { 262 uint16x4_t val[3]; 263 } uint16x4x3_t; 264 265 typedef struct uint16x8x3_t 266 { 267 uint16x8_t val[3]; 268 } uint16x8x3_t; 269 270 typedef struct uint32x2x3_t 271 { 272 uint32x2_t val[3]; 273 } uint32x2x3_t; 274 275 typedef struct uint32x4x3_t 276 { 277 uint32x4_t val[3]; 278 } uint32x4x3_t; 279 280 typedef struct uint64x1x3_t 281 { 282 uint64x1_t val[3]; 283 } uint64x1x3_t; 284 285 typedef struct uint64x2x3_t 286 { 287 uint64x2_t val[3]; 288 } uint64x2x3_t; 289 290 typedef struct float32x2x3_t 291 { 292 float32x2_t val[3]; 293 } float32x2x3_t; 294 295 typedef struct float32x4x3_t 296 { 297 float32x4_t val[3]; 298 } float32x4x3_t; 299 300 typedef struct float64x2x3_t 301 { 302 float64x2_t val[3]; 303 } float64x2x3_t; 304 305 typedef struct float64x1x3_t 306 { 307 float64x1_t val[3]; 308 } float64x1x3_t; 309 310 typedef struct poly8x8x3_t 311 { 312 poly8x8_t val[3]; 313 } poly8x8x3_t; 314 315 typedef struct poly8x16x3_t 316 { 317 poly8x16_t val[3]; 318 } poly8x16x3_t; 319 320 typedef struct poly16x4x3_t 321 { 322 poly16x4_t val[3]; 323 } poly16x4x3_t; 324 325 typedef struct poly16x8x3_t 326 { 327 poly16x8_t val[3]; 328 } poly16x8x3_t; 329 330 typedef struct int8x8x4_t 331 { 332 int8x8_t val[4]; 333 } int8x8x4_t; 334 335 typedef struct int8x16x4_t 336 { 337 int8x16_t val[4]; 338 } int8x16x4_t; 339 340 typedef struct int16x4x4_t 341 { 342 int16x4_t val[4]; 343 } int16x4x4_t; 344 345 typedef struct int16x8x4_t 346 { 347 int16x8_t val[4]; 348 } int16x8x4_t; 349 350 typedef struct int32x2x4_t 351 { 352 int32x2_t val[4]; 353 } int32x2x4_t; 354 355 typedef struct int32x4x4_t 356 { 357 int32x4_t val[4]; 358 } int32x4x4_t; 359 360 typedef struct int64x1x4_t 361 { 362 int64x1_t val[4]; 363 } int64x1x4_t; 364 365 typedef struct int64x2x4_t 366 { 367 int64x2_t val[4]; 368 } int64x2x4_t; 369 370 typedef struct uint8x8x4_t 371 { 372 uint8x8_t val[4]; 373 } uint8x8x4_t; 374 375 typedef struct uint8x16x4_t 376 { 377 uint8x16_t val[4]; 378 } uint8x16x4_t; 379 380 typedef struct uint16x4x4_t 381 { 382 uint16x4_t val[4]; 383 } uint16x4x4_t; 384 385 typedef struct uint16x8x4_t 386 { 387 uint16x8_t val[4]; 388 } uint16x8x4_t; 389 390 typedef struct uint32x2x4_t 391 { 392 uint32x2_t val[4]; 393 } uint32x2x4_t; 394 395 typedef struct uint32x4x4_t 396 { 397 uint32x4_t val[4]; 398 } uint32x4x4_t; 399 400 typedef struct uint64x1x4_t 401 { 402 uint64x1_t val[4]; 403 } uint64x1x4_t; 404 405 typedef struct uint64x2x4_t 406 { 407 uint64x2_t val[4]; 408 } uint64x2x4_t; 409 410 typedef struct float32x2x4_t 411 { 412 float32x2_t val[4]; 413 } float32x2x4_t; 414 415 typedef struct float32x4x4_t 416 { 417 float32x4_t val[4]; 418 } float32x4x4_t; 419 420 typedef struct float64x2x4_t 421 { 422 float64x2_t val[4]; 423 } float64x2x4_t; 424 425 typedef struct float64x1x4_t 426 { 427 float64x1_t val[4]; 428 } float64x1x4_t; 429 430 typedef struct poly8x8x4_t 431 { 432 poly8x8_t val[4]; 433 } poly8x8x4_t; 434 435 typedef struct poly8x16x4_t 436 { 437 poly8x16_t val[4]; 438 } poly8x16x4_t; 439 440 typedef struct poly16x4x4_t 441 { 442 poly16x4_t val[4]; 443 } poly16x4x4_t; 444 445 typedef struct poly16x8x4_t 446 { 447 poly16x8_t val[4]; 448 } poly16x8x4_t; 449 450 /* vget_lane internal macros. */ 451 452 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \ 453 (__cast_ret \ 454 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b)) 455 456 #define __aarch64_vget_lane_f32(__a, __b) \ 457 __aarch64_vget_lane_any (v2sf, , , __a, __b) 458 #define __aarch64_vget_lane_f64(__a, __b) (__a) 459 460 #define __aarch64_vget_lane_p8(__a, __b) \ 461 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b) 462 #define __aarch64_vget_lane_p16(__a, __b) \ 463 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b) 464 465 #define __aarch64_vget_lane_s8(__a, __b) \ 466 __aarch64_vget_lane_any (v8qi, , ,__a, __b) 467 #define __aarch64_vget_lane_s16(__a, __b) \ 468 __aarch64_vget_lane_any (v4hi, , ,__a, __b) 469 #define __aarch64_vget_lane_s32(__a, __b) \ 470 __aarch64_vget_lane_any (v2si, , ,__a, __b) 471 #define __aarch64_vget_lane_s64(__a, __b) (__a) 472 473 #define __aarch64_vget_lane_u8(__a, __b) \ 474 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b) 475 #define __aarch64_vget_lane_u16(__a, __b) \ 476 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b) 477 #define __aarch64_vget_lane_u32(__a, __b) \ 478 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b) 479 #define __aarch64_vget_lane_u64(__a, __b) (__a) 480 481 #define __aarch64_vgetq_lane_f32(__a, __b) \ 482 __aarch64_vget_lane_any (v4sf, , , __a, __b) 483 #define __aarch64_vgetq_lane_f64(__a, __b) \ 484 __aarch64_vget_lane_any (v2df, , , __a, __b) 485 486 #define __aarch64_vgetq_lane_p8(__a, __b) \ 487 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b) 488 #define __aarch64_vgetq_lane_p16(__a, __b) \ 489 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b) 490 491 #define __aarch64_vgetq_lane_s8(__a, __b) \ 492 __aarch64_vget_lane_any (v16qi, , ,__a, __b) 493 #define __aarch64_vgetq_lane_s16(__a, __b) \ 494 __aarch64_vget_lane_any (v8hi, , ,__a, __b) 495 #define __aarch64_vgetq_lane_s32(__a, __b) \ 496 __aarch64_vget_lane_any (v4si, , ,__a, __b) 497 #define __aarch64_vgetq_lane_s64(__a, __b) \ 498 __aarch64_vget_lane_any (v2di, , ,__a, __b) 499 500 #define __aarch64_vgetq_lane_u8(__a, __b) \ 501 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b) 502 #define __aarch64_vgetq_lane_u16(__a, __b) \ 503 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b) 504 #define __aarch64_vgetq_lane_u32(__a, __b) \ 505 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b) 506 #define __aarch64_vgetq_lane_u64(__a, __b) \ 507 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b) 508 509 /* __aarch64_vdup_lane internal macros. */ 510 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \ 511 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b)) 512 513 #define __aarch64_vdup_lane_f32(__a, __b) \ 514 __aarch64_vdup_lane_any (f32, , , __a, __b) 515 #define __aarch64_vdup_lane_f64(__a, __b) (__a) 516 #define __aarch64_vdup_lane_p8(__a, __b) \ 517 __aarch64_vdup_lane_any (p8, , , __a, __b) 518 #define __aarch64_vdup_lane_p16(__a, __b) \ 519 __aarch64_vdup_lane_any (p16, , , __a, __b) 520 #define __aarch64_vdup_lane_s8(__a, __b) \ 521 __aarch64_vdup_lane_any (s8, , , __a, __b) 522 #define __aarch64_vdup_lane_s16(__a, __b) \ 523 __aarch64_vdup_lane_any (s16, , , __a, __b) 524 #define __aarch64_vdup_lane_s32(__a, __b) \ 525 __aarch64_vdup_lane_any (s32, , , __a, __b) 526 #define __aarch64_vdup_lane_s64(__a, __b) (__a) 527 #define __aarch64_vdup_lane_u8(__a, __b) \ 528 __aarch64_vdup_lane_any (u8, , , __a, __b) 529 #define __aarch64_vdup_lane_u16(__a, __b) \ 530 __aarch64_vdup_lane_any (u16, , , __a, __b) 531 #define __aarch64_vdup_lane_u32(__a, __b) \ 532 __aarch64_vdup_lane_any (u32, , , __a, __b) 533 #define __aarch64_vdup_lane_u64(__a, __b) (__a) 534 535 /* __aarch64_vdup_laneq internal macros. */ 536 #define __aarch64_vdup_laneq_f32(__a, __b) \ 537 __aarch64_vdup_lane_any (f32, , q, __a, __b) 538 #define __aarch64_vdup_laneq_f64(__a, __b) \ 539 __aarch64_vdup_lane_any (f64, , q, __a, __b) 540 #define __aarch64_vdup_laneq_p8(__a, __b) \ 541 __aarch64_vdup_lane_any (p8, , q, __a, __b) 542 #define __aarch64_vdup_laneq_p16(__a, __b) \ 543 __aarch64_vdup_lane_any (p16, , q, __a, __b) 544 #define __aarch64_vdup_laneq_s8(__a, __b) \ 545 __aarch64_vdup_lane_any (s8, , q, __a, __b) 546 #define __aarch64_vdup_laneq_s16(__a, __b) \ 547 __aarch64_vdup_lane_any (s16, , q, __a, __b) 548 #define __aarch64_vdup_laneq_s32(__a, __b) \ 549 __aarch64_vdup_lane_any (s32, , q, __a, __b) 550 #define __aarch64_vdup_laneq_s64(__a, __b) \ 551 __aarch64_vdup_lane_any (s64, , q, __a, __b) 552 #define __aarch64_vdup_laneq_u8(__a, __b) \ 553 __aarch64_vdup_lane_any (u8, , q, __a, __b) 554 #define __aarch64_vdup_laneq_u16(__a, __b) \ 555 __aarch64_vdup_lane_any (u16, , q, __a, __b) 556 #define __aarch64_vdup_laneq_u32(__a, __b) \ 557 __aarch64_vdup_lane_any (u32, , q, __a, __b) 558 #define __aarch64_vdup_laneq_u64(__a, __b) \ 559 __aarch64_vdup_lane_any (u64, , q, __a, __b) 560 561 /* __aarch64_vdupq_lane internal macros. */ 562 #define __aarch64_vdupq_lane_f32(__a, __b) \ 563 __aarch64_vdup_lane_any (f32, q, , __a, __b) 564 #define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a)) 565 #define __aarch64_vdupq_lane_p8(__a, __b) \ 566 __aarch64_vdup_lane_any (p8, q, , __a, __b) 567 #define __aarch64_vdupq_lane_p16(__a, __b) \ 568 __aarch64_vdup_lane_any (p16, q, , __a, __b) 569 #define __aarch64_vdupq_lane_s8(__a, __b) \ 570 __aarch64_vdup_lane_any (s8, q, , __a, __b) 571 #define __aarch64_vdupq_lane_s16(__a, __b) \ 572 __aarch64_vdup_lane_any (s16, q, , __a, __b) 573 #define __aarch64_vdupq_lane_s32(__a, __b) \ 574 __aarch64_vdup_lane_any (s32, q, , __a, __b) 575 #define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a)) 576 #define __aarch64_vdupq_lane_u8(__a, __b) \ 577 __aarch64_vdup_lane_any (u8, q, , __a, __b) 578 #define __aarch64_vdupq_lane_u16(__a, __b) \ 579 __aarch64_vdup_lane_any (u16, q, , __a, __b) 580 #define __aarch64_vdupq_lane_u32(__a, __b) \ 581 __aarch64_vdup_lane_any (u32, q, , __a, __b) 582 #define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a)) 583 584 /* __aarch64_vdupq_laneq internal macros. */ 585 #define __aarch64_vdupq_laneq_f32(__a, __b) \ 586 __aarch64_vdup_lane_any (f32, q, q, __a, __b) 587 #define __aarch64_vdupq_laneq_f64(__a, __b) \ 588 __aarch64_vdup_lane_any (f64, q, q, __a, __b) 589 #define __aarch64_vdupq_laneq_p8(__a, __b) \ 590 __aarch64_vdup_lane_any (p8, q, q, __a, __b) 591 #define __aarch64_vdupq_laneq_p16(__a, __b) \ 592 __aarch64_vdup_lane_any (p16, q, q, __a, __b) 593 #define __aarch64_vdupq_laneq_s8(__a, __b) \ 594 __aarch64_vdup_lane_any (s8, q, q, __a, __b) 595 #define __aarch64_vdupq_laneq_s16(__a, __b) \ 596 __aarch64_vdup_lane_any (s16, q, q, __a, __b) 597 #define __aarch64_vdupq_laneq_s32(__a, __b) \ 598 __aarch64_vdup_lane_any (s32, q, q, __a, __b) 599 #define __aarch64_vdupq_laneq_s64(__a, __b) \ 600 __aarch64_vdup_lane_any (s64, q, q, __a, __b) 601 #define __aarch64_vdupq_laneq_u8(__a, __b) \ 602 __aarch64_vdup_lane_any (u8, q, q, __a, __b) 603 #define __aarch64_vdupq_laneq_u16(__a, __b) \ 604 __aarch64_vdup_lane_any (u16, q, q, __a, __b) 605 #define __aarch64_vdupq_laneq_u32(__a, __b) \ 606 __aarch64_vdup_lane_any (u32, q, q, __a, __b) 607 #define __aarch64_vdupq_laneq_u64(__a, __b) \ 608 __aarch64_vdup_lane_any (u64, q, q, __a, __b) 609 610 /* vadd */ 611 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 612 vadd_s8 (int8x8_t __a, int8x8_t __b) 613 { 614 return __a + __b; 615 } 616 617 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 618 vadd_s16 (int16x4_t __a, int16x4_t __b) 619 { 620 return __a + __b; 621 } 622 623 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 624 vadd_s32 (int32x2_t __a, int32x2_t __b) 625 { 626 return __a + __b; 627 } 628 629 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 630 vadd_f32 (float32x2_t __a, float32x2_t __b) 631 { 632 return __a + __b; 633 } 634 635 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 636 vadd_f64 (float64x1_t __a, float64x1_t __b) 637 { 638 return __a + __b; 639 } 640 641 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 642 vadd_u8 (uint8x8_t __a, uint8x8_t __b) 643 { 644 return __a + __b; 645 } 646 647 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 648 vadd_u16 (uint16x4_t __a, uint16x4_t __b) 649 { 650 return __a + __b; 651 } 652 653 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 654 vadd_u32 (uint32x2_t __a, uint32x2_t __b) 655 { 656 return __a + __b; 657 } 658 659 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 660 vadd_s64 (int64x1_t __a, int64x1_t __b) 661 { 662 return __a + __b; 663 } 664 665 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 666 vadd_u64 (uint64x1_t __a, uint64x1_t __b) 667 { 668 return __a + __b; 669 } 670 671 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 672 vaddq_s8 (int8x16_t __a, int8x16_t __b) 673 { 674 return __a + __b; 675 } 676 677 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 678 vaddq_s16 (int16x8_t __a, int16x8_t __b) 679 { 680 return __a + __b; 681 } 682 683 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 684 vaddq_s32 (int32x4_t __a, int32x4_t __b) 685 { 686 return __a + __b; 687 } 688 689 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 690 vaddq_s64 (int64x2_t __a, int64x2_t __b) 691 { 692 return __a + __b; 693 } 694 695 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 696 vaddq_f32 (float32x4_t __a, float32x4_t __b) 697 { 698 return __a + __b; 699 } 700 701 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 702 vaddq_f64 (float64x2_t __a, float64x2_t __b) 703 { 704 return __a + __b; 705 } 706 707 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 708 vaddq_u8 (uint8x16_t __a, uint8x16_t __b) 709 { 710 return __a + __b; 711 } 712 713 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 714 vaddq_u16 (uint16x8_t __a, uint16x8_t __b) 715 { 716 return __a + __b; 717 } 718 719 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 720 vaddq_u32 (uint32x4_t __a, uint32x4_t __b) 721 { 722 return __a + __b; 723 } 724 725 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 726 vaddq_u64 (uint64x2_t __a, uint64x2_t __b) 727 { 728 return __a + __b; 729 } 730 731 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 732 vaddl_s8 (int8x8_t __a, int8x8_t __b) 733 { 734 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b); 735 } 736 737 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 738 vaddl_s16 (int16x4_t __a, int16x4_t __b) 739 { 740 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b); 741 } 742 743 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 744 vaddl_s32 (int32x2_t __a, int32x2_t __b) 745 { 746 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b); 747 } 748 749 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 750 vaddl_u8 (uint8x8_t __a, uint8x8_t __b) 751 { 752 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a, 753 (int8x8_t) __b); 754 } 755 756 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 757 vaddl_u16 (uint16x4_t __a, uint16x4_t __b) 758 { 759 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a, 760 (int16x4_t) __b); 761 } 762 763 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 764 vaddl_u32 (uint32x2_t __a, uint32x2_t __b) 765 { 766 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a, 767 (int32x2_t) __b); 768 } 769 770 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 771 vaddl_high_s8 (int8x16_t __a, int8x16_t __b) 772 { 773 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b); 774 } 775 776 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 777 vaddl_high_s16 (int16x8_t __a, int16x8_t __b) 778 { 779 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b); 780 } 781 782 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 783 vaddl_high_s32 (int32x4_t __a, int32x4_t __b) 784 { 785 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b); 786 } 787 788 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 789 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b) 790 { 791 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a, 792 (int8x16_t) __b); 793 } 794 795 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 796 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b) 797 { 798 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a, 799 (int16x8_t) __b); 800 } 801 802 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 803 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b) 804 { 805 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a, 806 (int32x4_t) __b); 807 } 808 809 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 810 vaddw_s8 (int16x8_t __a, int8x8_t __b) 811 { 812 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b); 813 } 814 815 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 816 vaddw_s16 (int32x4_t __a, int16x4_t __b) 817 { 818 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b); 819 } 820 821 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 822 vaddw_s32 (int64x2_t __a, int32x2_t __b) 823 { 824 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b); 825 } 826 827 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 828 vaddw_u8 (uint16x8_t __a, uint8x8_t __b) 829 { 830 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a, 831 (int8x8_t) __b); 832 } 833 834 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 835 vaddw_u16 (uint32x4_t __a, uint16x4_t __b) 836 { 837 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a, 838 (int16x4_t) __b); 839 } 840 841 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 842 vaddw_u32 (uint64x2_t __a, uint32x2_t __b) 843 { 844 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a, 845 (int32x2_t) __b); 846 } 847 848 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 849 vaddw_high_s8 (int16x8_t __a, int8x16_t __b) 850 { 851 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b); 852 } 853 854 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 855 vaddw_high_s16 (int32x4_t __a, int16x8_t __b) 856 { 857 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b); 858 } 859 860 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 861 vaddw_high_s32 (int64x2_t __a, int32x4_t __b) 862 { 863 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b); 864 } 865 866 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 867 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b) 868 { 869 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a, 870 (int8x16_t) __b); 871 } 872 873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 874 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b) 875 { 876 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a, 877 (int16x8_t) __b); 878 } 879 880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 881 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b) 882 { 883 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a, 884 (int32x4_t) __b); 885 } 886 887 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 888 vhadd_s8 (int8x8_t __a, int8x8_t __b) 889 { 890 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b); 891 } 892 893 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 894 vhadd_s16 (int16x4_t __a, int16x4_t __b) 895 { 896 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b); 897 } 898 899 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 900 vhadd_s32 (int32x2_t __a, int32x2_t __b) 901 { 902 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b); 903 } 904 905 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 906 vhadd_u8 (uint8x8_t __a, uint8x8_t __b) 907 { 908 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a, 909 (int8x8_t) __b); 910 } 911 912 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 913 vhadd_u16 (uint16x4_t __a, uint16x4_t __b) 914 { 915 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a, 916 (int16x4_t) __b); 917 } 918 919 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 920 vhadd_u32 (uint32x2_t __a, uint32x2_t __b) 921 { 922 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a, 923 (int32x2_t) __b); 924 } 925 926 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 927 vhaddq_s8 (int8x16_t __a, int8x16_t __b) 928 { 929 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b); 930 } 931 932 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 933 vhaddq_s16 (int16x8_t __a, int16x8_t __b) 934 { 935 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b); 936 } 937 938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 939 vhaddq_s32 (int32x4_t __a, int32x4_t __b) 940 { 941 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b); 942 } 943 944 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 945 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) 946 { 947 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a, 948 (int8x16_t) __b); 949 } 950 951 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 952 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) 953 { 954 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a, 955 (int16x8_t) __b); 956 } 957 958 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 959 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) 960 { 961 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a, 962 (int32x4_t) __b); 963 } 964 965 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 966 vrhadd_s8 (int8x8_t __a, int8x8_t __b) 967 { 968 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b); 969 } 970 971 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 972 vrhadd_s16 (int16x4_t __a, int16x4_t __b) 973 { 974 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b); 975 } 976 977 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 978 vrhadd_s32 (int32x2_t __a, int32x2_t __b) 979 { 980 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b); 981 } 982 983 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 984 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) 985 { 986 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a, 987 (int8x8_t) __b); 988 } 989 990 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 991 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) 992 { 993 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a, 994 (int16x4_t) __b); 995 } 996 997 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 998 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) 999 { 1000 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a, 1001 (int32x2_t) __b); 1002 } 1003 1004 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1005 vrhaddq_s8 (int8x16_t __a, int8x16_t __b) 1006 { 1007 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b); 1008 } 1009 1010 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1011 vrhaddq_s16 (int16x8_t __a, int16x8_t __b) 1012 { 1013 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b); 1014 } 1015 1016 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1017 vrhaddq_s32 (int32x4_t __a, int32x4_t __b) 1018 { 1019 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b); 1020 } 1021 1022 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1023 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) 1024 { 1025 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a, 1026 (int8x16_t) __b); 1027 } 1028 1029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1030 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) 1031 { 1032 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a, 1033 (int16x8_t) __b); 1034 } 1035 1036 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1037 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) 1038 { 1039 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a, 1040 (int32x4_t) __b); 1041 } 1042 1043 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1044 vaddhn_s16 (int16x8_t __a, int16x8_t __b) 1045 { 1046 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b); 1047 } 1048 1049 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1050 vaddhn_s32 (int32x4_t __a, int32x4_t __b) 1051 { 1052 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b); 1053 } 1054 1055 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1056 vaddhn_s64 (int64x2_t __a, int64x2_t __b) 1057 { 1058 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b); 1059 } 1060 1061 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1062 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) 1063 { 1064 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a, 1065 (int16x8_t) __b); 1066 } 1067 1068 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1069 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) 1070 { 1071 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a, 1072 (int32x4_t) __b); 1073 } 1074 1075 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1076 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) 1077 { 1078 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a, 1079 (int64x2_t) __b); 1080 } 1081 1082 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1083 vraddhn_s16 (int16x8_t __a, int16x8_t __b) 1084 { 1085 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b); 1086 } 1087 1088 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1089 vraddhn_s32 (int32x4_t __a, int32x4_t __b) 1090 { 1091 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b); 1092 } 1093 1094 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1095 vraddhn_s64 (int64x2_t __a, int64x2_t __b) 1096 { 1097 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b); 1098 } 1099 1100 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1101 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) 1102 { 1103 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a, 1104 (int16x8_t) __b); 1105 } 1106 1107 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1108 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) 1109 { 1110 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a, 1111 (int32x4_t) __b); 1112 } 1113 1114 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1115 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) 1116 { 1117 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a, 1118 (int64x2_t) __b); 1119 } 1120 1121 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1122 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) 1123 { 1124 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c); 1125 } 1126 1127 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1128 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) 1129 { 1130 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c); 1131 } 1132 1133 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1134 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) 1135 { 1136 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c); 1137 } 1138 1139 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1140 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) 1141 { 1142 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a, 1143 (int16x8_t) __b, 1144 (int16x8_t) __c); 1145 } 1146 1147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1148 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) 1149 { 1150 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a, 1151 (int32x4_t) __b, 1152 (int32x4_t) __c); 1153 } 1154 1155 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1156 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) 1157 { 1158 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a, 1159 (int64x2_t) __b, 1160 (int64x2_t) __c); 1161 } 1162 1163 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1164 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) 1165 { 1166 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c); 1167 } 1168 1169 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1170 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) 1171 { 1172 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c); 1173 } 1174 1175 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1176 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) 1177 { 1178 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c); 1179 } 1180 1181 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1182 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) 1183 { 1184 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a, 1185 (int16x8_t) __b, 1186 (int16x8_t) __c); 1187 } 1188 1189 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1190 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) 1191 { 1192 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a, 1193 (int32x4_t) __b, 1194 (int32x4_t) __c); 1195 } 1196 1197 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1198 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) 1199 { 1200 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a, 1201 (int64x2_t) __b, 1202 (int64x2_t) __c); 1203 } 1204 1205 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 1206 vdiv_f32 (float32x2_t __a, float32x2_t __b) 1207 { 1208 return __a / __b; 1209 } 1210 1211 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 1212 vdiv_f64 (float64x1_t __a, float64x1_t __b) 1213 { 1214 return __a / __b; 1215 } 1216 1217 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 1218 vdivq_f32 (float32x4_t __a, float32x4_t __b) 1219 { 1220 return __a / __b; 1221 } 1222 1223 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 1224 vdivq_f64 (float64x2_t __a, float64x2_t __b) 1225 { 1226 return __a / __b; 1227 } 1228 1229 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1230 vmul_s8 (int8x8_t __a, int8x8_t __b) 1231 { 1232 return __a * __b; 1233 } 1234 1235 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1236 vmul_s16 (int16x4_t __a, int16x4_t __b) 1237 { 1238 return __a * __b; 1239 } 1240 1241 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1242 vmul_s32 (int32x2_t __a, int32x2_t __b) 1243 { 1244 return __a * __b; 1245 } 1246 1247 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 1248 vmul_f32 (float32x2_t __a, float32x2_t __b) 1249 { 1250 return __a * __b; 1251 } 1252 1253 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1254 vmul_u8 (uint8x8_t __a, uint8x8_t __b) 1255 { 1256 return __a * __b; 1257 } 1258 1259 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1260 vmul_u16 (uint16x4_t __a, uint16x4_t __b) 1261 { 1262 return __a * __b; 1263 } 1264 1265 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1266 vmul_u32 (uint32x2_t __a, uint32x2_t __b) 1267 { 1268 return __a * __b; 1269 } 1270 1271 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 1272 vmul_p8 (poly8x8_t __a, poly8x8_t __b) 1273 { 1274 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a, 1275 (int8x8_t) __b); 1276 } 1277 1278 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1279 vmulq_s8 (int8x16_t __a, int8x16_t __b) 1280 { 1281 return __a * __b; 1282 } 1283 1284 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1285 vmulq_s16 (int16x8_t __a, int16x8_t __b) 1286 { 1287 return __a * __b; 1288 } 1289 1290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1291 vmulq_s32 (int32x4_t __a, int32x4_t __b) 1292 { 1293 return __a * __b; 1294 } 1295 1296 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 1297 vmulq_f32 (float32x4_t __a, float32x4_t __b) 1298 { 1299 return __a * __b; 1300 } 1301 1302 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 1303 vmulq_f64 (float64x2_t __a, float64x2_t __b) 1304 { 1305 return __a * __b; 1306 } 1307 1308 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1309 vmulq_u8 (uint8x16_t __a, uint8x16_t __b) 1310 { 1311 return __a * __b; 1312 } 1313 1314 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1315 vmulq_u16 (uint16x8_t __a, uint16x8_t __b) 1316 { 1317 return __a * __b; 1318 } 1319 1320 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1321 vmulq_u32 (uint32x4_t __a, uint32x4_t __b) 1322 { 1323 return __a * __b; 1324 } 1325 1326 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 1327 vmulq_p8 (poly8x16_t __a, poly8x16_t __b) 1328 { 1329 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a, 1330 (int8x16_t) __b); 1331 } 1332 1333 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1334 vand_s8 (int8x8_t __a, int8x8_t __b) 1335 { 1336 return __a & __b; 1337 } 1338 1339 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1340 vand_s16 (int16x4_t __a, int16x4_t __b) 1341 { 1342 return __a & __b; 1343 } 1344 1345 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1346 vand_s32 (int32x2_t __a, int32x2_t __b) 1347 { 1348 return __a & __b; 1349 } 1350 1351 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1352 vand_u8 (uint8x8_t __a, uint8x8_t __b) 1353 { 1354 return __a & __b; 1355 } 1356 1357 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1358 vand_u16 (uint16x4_t __a, uint16x4_t __b) 1359 { 1360 return __a & __b; 1361 } 1362 1363 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1364 vand_u32 (uint32x2_t __a, uint32x2_t __b) 1365 { 1366 return __a & __b; 1367 } 1368 1369 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 1370 vand_s64 (int64x1_t __a, int64x1_t __b) 1371 { 1372 return __a & __b; 1373 } 1374 1375 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 1376 vand_u64 (uint64x1_t __a, uint64x1_t __b) 1377 { 1378 return __a & __b; 1379 } 1380 1381 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1382 vandq_s8 (int8x16_t __a, int8x16_t __b) 1383 { 1384 return __a & __b; 1385 } 1386 1387 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1388 vandq_s16 (int16x8_t __a, int16x8_t __b) 1389 { 1390 return __a & __b; 1391 } 1392 1393 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1394 vandq_s32 (int32x4_t __a, int32x4_t __b) 1395 { 1396 return __a & __b; 1397 } 1398 1399 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1400 vandq_s64 (int64x2_t __a, int64x2_t __b) 1401 { 1402 return __a & __b; 1403 } 1404 1405 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1406 vandq_u8 (uint8x16_t __a, uint8x16_t __b) 1407 { 1408 return __a & __b; 1409 } 1410 1411 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1412 vandq_u16 (uint16x8_t __a, uint16x8_t __b) 1413 { 1414 return __a & __b; 1415 } 1416 1417 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1418 vandq_u32 (uint32x4_t __a, uint32x4_t __b) 1419 { 1420 return __a & __b; 1421 } 1422 1423 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 1424 vandq_u64 (uint64x2_t __a, uint64x2_t __b) 1425 { 1426 return __a & __b; 1427 } 1428 1429 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1430 vorr_s8 (int8x8_t __a, int8x8_t __b) 1431 { 1432 return __a | __b; 1433 } 1434 1435 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1436 vorr_s16 (int16x4_t __a, int16x4_t __b) 1437 { 1438 return __a | __b; 1439 } 1440 1441 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1442 vorr_s32 (int32x2_t __a, int32x2_t __b) 1443 { 1444 return __a | __b; 1445 } 1446 1447 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1448 vorr_u8 (uint8x8_t __a, uint8x8_t __b) 1449 { 1450 return __a | __b; 1451 } 1452 1453 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1454 vorr_u16 (uint16x4_t __a, uint16x4_t __b) 1455 { 1456 return __a | __b; 1457 } 1458 1459 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1460 vorr_u32 (uint32x2_t __a, uint32x2_t __b) 1461 { 1462 return __a | __b; 1463 } 1464 1465 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 1466 vorr_s64 (int64x1_t __a, int64x1_t __b) 1467 { 1468 return __a | __b; 1469 } 1470 1471 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 1472 vorr_u64 (uint64x1_t __a, uint64x1_t __b) 1473 { 1474 return __a | __b; 1475 } 1476 1477 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1478 vorrq_s8 (int8x16_t __a, int8x16_t __b) 1479 { 1480 return __a | __b; 1481 } 1482 1483 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1484 vorrq_s16 (int16x8_t __a, int16x8_t __b) 1485 { 1486 return __a | __b; 1487 } 1488 1489 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1490 vorrq_s32 (int32x4_t __a, int32x4_t __b) 1491 { 1492 return __a | __b; 1493 } 1494 1495 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1496 vorrq_s64 (int64x2_t __a, int64x2_t __b) 1497 { 1498 return __a | __b; 1499 } 1500 1501 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1502 vorrq_u8 (uint8x16_t __a, uint8x16_t __b) 1503 { 1504 return __a | __b; 1505 } 1506 1507 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1508 vorrq_u16 (uint16x8_t __a, uint16x8_t __b) 1509 { 1510 return __a | __b; 1511 } 1512 1513 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1514 vorrq_u32 (uint32x4_t __a, uint32x4_t __b) 1515 { 1516 return __a | __b; 1517 } 1518 1519 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 1520 vorrq_u64 (uint64x2_t __a, uint64x2_t __b) 1521 { 1522 return __a | __b; 1523 } 1524 1525 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1526 veor_s8 (int8x8_t __a, int8x8_t __b) 1527 { 1528 return __a ^ __b; 1529 } 1530 1531 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1532 veor_s16 (int16x4_t __a, int16x4_t __b) 1533 { 1534 return __a ^ __b; 1535 } 1536 1537 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1538 veor_s32 (int32x2_t __a, int32x2_t __b) 1539 { 1540 return __a ^ __b; 1541 } 1542 1543 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1544 veor_u8 (uint8x8_t __a, uint8x8_t __b) 1545 { 1546 return __a ^ __b; 1547 } 1548 1549 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1550 veor_u16 (uint16x4_t __a, uint16x4_t __b) 1551 { 1552 return __a ^ __b; 1553 } 1554 1555 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1556 veor_u32 (uint32x2_t __a, uint32x2_t __b) 1557 { 1558 return __a ^ __b; 1559 } 1560 1561 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 1562 veor_s64 (int64x1_t __a, int64x1_t __b) 1563 { 1564 return __a ^ __b; 1565 } 1566 1567 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 1568 veor_u64 (uint64x1_t __a, uint64x1_t __b) 1569 { 1570 return __a ^ __b; 1571 } 1572 1573 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1574 veorq_s8 (int8x16_t __a, int8x16_t __b) 1575 { 1576 return __a ^ __b; 1577 } 1578 1579 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1580 veorq_s16 (int16x8_t __a, int16x8_t __b) 1581 { 1582 return __a ^ __b; 1583 } 1584 1585 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1586 veorq_s32 (int32x4_t __a, int32x4_t __b) 1587 { 1588 return __a ^ __b; 1589 } 1590 1591 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1592 veorq_s64 (int64x2_t __a, int64x2_t __b) 1593 { 1594 return __a ^ __b; 1595 } 1596 1597 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1598 veorq_u8 (uint8x16_t __a, uint8x16_t __b) 1599 { 1600 return __a ^ __b; 1601 } 1602 1603 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1604 veorq_u16 (uint16x8_t __a, uint16x8_t __b) 1605 { 1606 return __a ^ __b; 1607 } 1608 1609 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1610 veorq_u32 (uint32x4_t __a, uint32x4_t __b) 1611 { 1612 return __a ^ __b; 1613 } 1614 1615 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 1616 veorq_u64 (uint64x2_t __a, uint64x2_t __b) 1617 { 1618 return __a ^ __b; 1619 } 1620 1621 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1622 vbic_s8 (int8x8_t __a, int8x8_t __b) 1623 { 1624 return __a & ~__b; 1625 } 1626 1627 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1628 vbic_s16 (int16x4_t __a, int16x4_t __b) 1629 { 1630 return __a & ~__b; 1631 } 1632 1633 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1634 vbic_s32 (int32x2_t __a, int32x2_t __b) 1635 { 1636 return __a & ~__b; 1637 } 1638 1639 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1640 vbic_u8 (uint8x8_t __a, uint8x8_t __b) 1641 { 1642 return __a & ~__b; 1643 } 1644 1645 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1646 vbic_u16 (uint16x4_t __a, uint16x4_t __b) 1647 { 1648 return __a & ~__b; 1649 } 1650 1651 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1652 vbic_u32 (uint32x2_t __a, uint32x2_t __b) 1653 { 1654 return __a & ~__b; 1655 } 1656 1657 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 1658 vbic_s64 (int64x1_t __a, int64x1_t __b) 1659 { 1660 return __a & ~__b; 1661 } 1662 1663 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 1664 vbic_u64 (uint64x1_t __a, uint64x1_t __b) 1665 { 1666 return __a & ~__b; 1667 } 1668 1669 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1670 vbicq_s8 (int8x16_t __a, int8x16_t __b) 1671 { 1672 return __a & ~__b; 1673 } 1674 1675 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1676 vbicq_s16 (int16x8_t __a, int16x8_t __b) 1677 { 1678 return __a & ~__b; 1679 } 1680 1681 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1682 vbicq_s32 (int32x4_t __a, int32x4_t __b) 1683 { 1684 return __a & ~__b; 1685 } 1686 1687 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1688 vbicq_s64 (int64x2_t __a, int64x2_t __b) 1689 { 1690 return __a & ~__b; 1691 } 1692 1693 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1694 vbicq_u8 (uint8x16_t __a, uint8x16_t __b) 1695 { 1696 return __a & ~__b; 1697 } 1698 1699 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1700 vbicq_u16 (uint16x8_t __a, uint16x8_t __b) 1701 { 1702 return __a & ~__b; 1703 } 1704 1705 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1706 vbicq_u32 (uint32x4_t __a, uint32x4_t __b) 1707 { 1708 return __a & ~__b; 1709 } 1710 1711 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 1712 vbicq_u64 (uint64x2_t __a, uint64x2_t __b) 1713 { 1714 return __a & ~__b; 1715 } 1716 1717 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1718 vorn_s8 (int8x8_t __a, int8x8_t __b) 1719 { 1720 return __a | ~__b; 1721 } 1722 1723 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1724 vorn_s16 (int16x4_t __a, int16x4_t __b) 1725 { 1726 return __a | ~__b; 1727 } 1728 1729 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1730 vorn_s32 (int32x2_t __a, int32x2_t __b) 1731 { 1732 return __a | ~__b; 1733 } 1734 1735 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1736 vorn_u8 (uint8x8_t __a, uint8x8_t __b) 1737 { 1738 return __a | ~__b; 1739 } 1740 1741 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1742 vorn_u16 (uint16x4_t __a, uint16x4_t __b) 1743 { 1744 return __a | ~__b; 1745 } 1746 1747 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1748 vorn_u32 (uint32x2_t __a, uint32x2_t __b) 1749 { 1750 return __a | ~__b; 1751 } 1752 1753 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 1754 vorn_s64 (int64x1_t __a, int64x1_t __b) 1755 { 1756 return __a | ~__b; 1757 } 1758 1759 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 1760 vorn_u64 (uint64x1_t __a, uint64x1_t __b) 1761 { 1762 return __a | ~__b; 1763 } 1764 1765 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1766 vornq_s8 (int8x16_t __a, int8x16_t __b) 1767 { 1768 return __a | ~__b; 1769 } 1770 1771 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1772 vornq_s16 (int16x8_t __a, int16x8_t __b) 1773 { 1774 return __a | ~__b; 1775 } 1776 1777 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1778 vornq_s32 (int32x4_t __a, int32x4_t __b) 1779 { 1780 return __a | ~__b; 1781 } 1782 1783 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1784 vornq_s64 (int64x2_t __a, int64x2_t __b) 1785 { 1786 return __a | ~__b; 1787 } 1788 1789 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1790 vornq_u8 (uint8x16_t __a, uint8x16_t __b) 1791 { 1792 return __a | ~__b; 1793 } 1794 1795 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1796 vornq_u16 (uint16x8_t __a, uint16x8_t __b) 1797 { 1798 return __a | ~__b; 1799 } 1800 1801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1802 vornq_u32 (uint32x4_t __a, uint32x4_t __b) 1803 { 1804 return __a | ~__b; 1805 } 1806 1807 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 1808 vornq_u64 (uint64x2_t __a, uint64x2_t __b) 1809 { 1810 return __a | ~__b; 1811 } 1812 1813 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 1814 vsub_s8 (int8x8_t __a, int8x8_t __b) 1815 { 1816 return __a - __b; 1817 } 1818 1819 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 1820 vsub_s16 (int16x4_t __a, int16x4_t __b) 1821 { 1822 return __a - __b; 1823 } 1824 1825 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 1826 vsub_s32 (int32x2_t __a, int32x2_t __b) 1827 { 1828 return __a - __b; 1829 } 1830 1831 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 1832 vsub_f32 (float32x2_t __a, float32x2_t __b) 1833 { 1834 return __a - __b; 1835 } 1836 1837 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 1838 vsub_f64 (float64x1_t __a, float64x1_t __b) 1839 { 1840 return __a - __b; 1841 } 1842 1843 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 1844 vsub_u8 (uint8x8_t __a, uint8x8_t __b) 1845 { 1846 return __a - __b; 1847 } 1848 1849 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 1850 vsub_u16 (uint16x4_t __a, uint16x4_t __b) 1851 { 1852 return __a - __b; 1853 } 1854 1855 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 1856 vsub_u32 (uint32x2_t __a, uint32x2_t __b) 1857 { 1858 return __a - __b; 1859 } 1860 1861 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 1862 vsub_s64 (int64x1_t __a, int64x1_t __b) 1863 { 1864 return __a - __b; 1865 } 1866 1867 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 1868 vsub_u64 (uint64x1_t __a, uint64x1_t __b) 1869 { 1870 return __a - __b; 1871 } 1872 1873 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 1874 vsubq_s8 (int8x16_t __a, int8x16_t __b) 1875 { 1876 return __a - __b; 1877 } 1878 1879 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1880 vsubq_s16 (int16x8_t __a, int16x8_t __b) 1881 { 1882 return __a - __b; 1883 } 1884 1885 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1886 vsubq_s32 (int32x4_t __a, int32x4_t __b) 1887 { 1888 return __a - __b; 1889 } 1890 1891 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1892 vsubq_s64 (int64x2_t __a, int64x2_t __b) 1893 { 1894 return __a - __b; 1895 } 1896 1897 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 1898 vsubq_f32 (float32x4_t __a, float32x4_t __b) 1899 { 1900 return __a - __b; 1901 } 1902 1903 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 1904 vsubq_f64 (float64x2_t __a, float64x2_t __b) 1905 { 1906 return __a - __b; 1907 } 1908 1909 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 1910 vsubq_u8 (uint8x16_t __a, uint8x16_t __b) 1911 { 1912 return __a - __b; 1913 } 1914 1915 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1916 vsubq_u16 (uint16x8_t __a, uint16x8_t __b) 1917 { 1918 return __a - __b; 1919 } 1920 1921 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1922 vsubq_u32 (uint32x4_t __a, uint32x4_t __b) 1923 { 1924 return __a - __b; 1925 } 1926 1927 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 1928 vsubq_u64 (uint64x2_t __a, uint64x2_t __b) 1929 { 1930 return __a - __b; 1931 } 1932 1933 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1934 vsubl_s8 (int8x8_t __a, int8x8_t __b) 1935 { 1936 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b); 1937 } 1938 1939 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1940 vsubl_s16 (int16x4_t __a, int16x4_t __b) 1941 { 1942 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b); 1943 } 1944 1945 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1946 vsubl_s32 (int32x2_t __a, int32x2_t __b) 1947 { 1948 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b); 1949 } 1950 1951 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1952 vsubl_u8 (uint8x8_t __a, uint8x8_t __b) 1953 { 1954 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a, 1955 (int8x8_t) __b); 1956 } 1957 1958 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1959 vsubl_u16 (uint16x4_t __a, uint16x4_t __b) 1960 { 1961 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a, 1962 (int16x4_t) __b); 1963 } 1964 1965 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 1966 vsubl_u32 (uint32x2_t __a, uint32x2_t __b) 1967 { 1968 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a, 1969 (int32x2_t) __b); 1970 } 1971 1972 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 1973 vsubl_high_s8 (int8x16_t __a, int8x16_t __b) 1974 { 1975 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b); 1976 } 1977 1978 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 1979 vsubl_high_s16 (int16x8_t __a, int16x8_t __b) 1980 { 1981 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b); 1982 } 1983 1984 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 1985 vsubl_high_s32 (int32x4_t __a, int32x4_t __b) 1986 { 1987 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b); 1988 } 1989 1990 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 1991 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b) 1992 { 1993 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a, 1994 (int8x16_t) __b); 1995 } 1996 1997 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 1998 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b) 1999 { 2000 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a, 2001 (int16x8_t) __b); 2002 } 2003 2004 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 2005 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b) 2006 { 2007 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a, 2008 (int32x4_t) __b); 2009 } 2010 2011 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2012 vsubw_s8 (int16x8_t __a, int8x8_t __b) 2013 { 2014 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b); 2015 } 2016 2017 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2018 vsubw_s16 (int32x4_t __a, int16x4_t __b) 2019 { 2020 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b); 2021 } 2022 2023 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 2024 vsubw_s32 (int64x2_t __a, int32x2_t __b) 2025 { 2026 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b); 2027 } 2028 2029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 2030 vsubw_u8 (uint16x8_t __a, uint8x8_t __b) 2031 { 2032 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a, 2033 (int8x8_t) __b); 2034 } 2035 2036 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2037 vsubw_u16 (uint32x4_t __a, uint16x4_t __b) 2038 { 2039 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a, 2040 (int16x4_t) __b); 2041 } 2042 2043 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 2044 vsubw_u32 (uint64x2_t __a, uint32x2_t __b) 2045 { 2046 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a, 2047 (int32x2_t) __b); 2048 } 2049 2050 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2051 vsubw_high_s8 (int16x8_t __a, int8x16_t __b) 2052 { 2053 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b); 2054 } 2055 2056 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2057 vsubw_high_s16 (int32x4_t __a, int16x8_t __b) 2058 { 2059 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b); 2060 } 2061 2062 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 2063 vsubw_high_s32 (int64x2_t __a, int32x4_t __b) 2064 { 2065 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b); 2066 } 2067 2068 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 2069 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b) 2070 { 2071 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a, 2072 (int8x16_t) __b); 2073 } 2074 2075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2076 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b) 2077 { 2078 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a, 2079 (int16x8_t) __b); 2080 } 2081 2082 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 2083 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b) 2084 { 2085 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a, 2086 (int32x4_t) __b); 2087 } 2088 2089 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 2090 vqadd_s8 (int8x8_t __a, int8x8_t __b) 2091 { 2092 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b); 2093 } 2094 2095 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 2096 vqadd_s16 (int16x4_t __a, int16x4_t __b) 2097 { 2098 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b); 2099 } 2100 2101 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 2102 vqadd_s32 (int32x2_t __a, int32x2_t __b) 2103 { 2104 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b); 2105 } 2106 2107 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 2108 vqadd_s64 (int64x1_t __a, int64x1_t __b) 2109 { 2110 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b); 2111 } 2112 2113 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 2114 vqadd_u8 (uint8x8_t __a, uint8x8_t __b) 2115 { 2116 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a, 2117 (int8x8_t) __b); 2118 } 2119 2120 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 2121 vqadd_u16 (uint16x4_t __a, uint16x4_t __b) 2122 { 2123 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a, 2124 (int16x4_t) __b); 2125 } 2126 2127 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 2128 vqadd_u32 (uint32x2_t __a, uint32x2_t __b) 2129 { 2130 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a, 2131 (int32x2_t) __b); 2132 } 2133 2134 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 2135 vqadd_u64 (uint64x1_t __a, uint64x1_t __b) 2136 { 2137 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a, 2138 (int64x1_t) __b); 2139 } 2140 2141 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 2142 vqaddq_s8 (int8x16_t __a, int8x16_t __b) 2143 { 2144 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b); 2145 } 2146 2147 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2148 vqaddq_s16 (int16x8_t __a, int16x8_t __b) 2149 { 2150 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b); 2151 } 2152 2153 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2154 vqaddq_s32 (int32x4_t __a, int32x4_t __b) 2155 { 2156 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b); 2157 } 2158 2159 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 2160 vqaddq_s64 (int64x2_t __a, int64x2_t __b) 2161 { 2162 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b); 2163 } 2164 2165 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 2166 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) 2167 { 2168 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a, 2169 (int8x16_t) __b); 2170 } 2171 2172 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 2173 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) 2174 { 2175 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a, 2176 (int16x8_t) __b); 2177 } 2178 2179 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2180 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) 2181 { 2182 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a, 2183 (int32x4_t) __b); 2184 } 2185 2186 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 2187 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) 2188 { 2189 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a, 2190 (int64x2_t) __b); 2191 } 2192 2193 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 2194 vqsub_s8 (int8x8_t __a, int8x8_t __b) 2195 { 2196 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b); 2197 } 2198 2199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 2200 vqsub_s16 (int16x4_t __a, int16x4_t __b) 2201 { 2202 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b); 2203 } 2204 2205 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 2206 vqsub_s32 (int32x2_t __a, int32x2_t __b) 2207 { 2208 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b); 2209 } 2210 2211 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 2212 vqsub_s64 (int64x1_t __a, int64x1_t __b) 2213 { 2214 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b); 2215 } 2216 2217 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 2218 vqsub_u8 (uint8x8_t __a, uint8x8_t __b) 2219 { 2220 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a, 2221 (int8x8_t) __b); 2222 } 2223 2224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 2225 vqsub_u16 (uint16x4_t __a, uint16x4_t __b) 2226 { 2227 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a, 2228 (int16x4_t) __b); 2229 } 2230 2231 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 2232 vqsub_u32 (uint32x2_t __a, uint32x2_t __b) 2233 { 2234 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a, 2235 (int32x2_t) __b); 2236 } 2237 2238 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 2239 vqsub_u64 (uint64x1_t __a, uint64x1_t __b) 2240 { 2241 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a, 2242 (int64x1_t) __b); 2243 } 2244 2245 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 2246 vqsubq_s8 (int8x16_t __a, int8x16_t __b) 2247 { 2248 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b); 2249 } 2250 2251 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2252 vqsubq_s16 (int16x8_t __a, int16x8_t __b) 2253 { 2254 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b); 2255 } 2256 2257 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2258 vqsubq_s32 (int32x4_t __a, int32x4_t __b) 2259 { 2260 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b); 2261 } 2262 2263 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 2264 vqsubq_s64 (int64x2_t __a, int64x2_t __b) 2265 { 2266 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b); 2267 } 2268 2269 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 2270 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) 2271 { 2272 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a, 2273 (int8x16_t) __b); 2274 } 2275 2276 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 2277 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) 2278 { 2279 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a, 2280 (int16x8_t) __b); 2281 } 2282 2283 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2284 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) 2285 { 2286 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a, 2287 (int32x4_t) __b); 2288 } 2289 2290 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 2291 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) 2292 { 2293 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a, 2294 (int64x2_t) __b); 2295 } 2296 2297 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 2298 vqneg_s8 (int8x8_t __a) 2299 { 2300 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a); 2301 } 2302 2303 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 2304 vqneg_s16 (int16x4_t __a) 2305 { 2306 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a); 2307 } 2308 2309 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 2310 vqneg_s32 (int32x2_t __a) 2311 { 2312 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); 2313 } 2314 2315 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 2316 vqnegq_s8 (int8x16_t __a) 2317 { 2318 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a); 2319 } 2320 2321 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2322 vqnegq_s16 (int16x8_t __a) 2323 { 2324 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a); 2325 } 2326 2327 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2328 vqnegq_s32 (int32x4_t __a) 2329 { 2330 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a); 2331 } 2332 2333 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 2334 vqabs_s8 (int8x8_t __a) 2335 { 2336 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a); 2337 } 2338 2339 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 2340 vqabs_s16 (int16x4_t __a) 2341 { 2342 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a); 2343 } 2344 2345 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 2346 vqabs_s32 (int32x2_t __a) 2347 { 2348 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); 2349 } 2350 2351 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 2352 vqabsq_s8 (int8x16_t __a) 2353 { 2354 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a); 2355 } 2356 2357 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2358 vqabsq_s16 (int16x8_t __a) 2359 { 2360 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a); 2361 } 2362 2363 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2364 vqabsq_s32 (int32x4_t __a) 2365 { 2366 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a); 2367 } 2368 2369 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 2370 vqdmulh_s16 (int16x4_t __a, int16x4_t __b) 2371 { 2372 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b); 2373 } 2374 2375 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 2376 vqdmulh_s32 (int32x2_t __a, int32x2_t __b) 2377 { 2378 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b); 2379 } 2380 2381 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2382 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) 2383 { 2384 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b); 2385 } 2386 2387 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2388 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) 2389 { 2390 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b); 2391 } 2392 2393 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 2394 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) 2395 { 2396 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b); 2397 } 2398 2399 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 2400 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) 2401 { 2402 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b); 2403 } 2404 2405 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 2406 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) 2407 { 2408 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b); 2409 } 2410 2411 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 2412 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) 2413 { 2414 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b); 2415 } 2416 2417 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 2418 vcreate_s8 (uint64_t __a) 2419 { 2420 return (int8x8_t) __a; 2421 } 2422 2423 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 2424 vcreate_s16 (uint64_t __a) 2425 { 2426 return (int16x4_t) __a; 2427 } 2428 2429 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 2430 vcreate_s32 (uint64_t __a) 2431 { 2432 return (int32x2_t) __a; 2433 } 2434 2435 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 2436 vcreate_s64 (uint64_t __a) 2437 { 2438 return (int64x1_t) __a; 2439 } 2440 2441 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2442 vcreate_f32 (uint64_t __a) 2443 { 2444 return (float32x2_t) __a; 2445 } 2446 2447 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 2448 vcreate_u8 (uint64_t __a) 2449 { 2450 return (uint8x8_t) __a; 2451 } 2452 2453 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 2454 vcreate_u16 (uint64_t __a) 2455 { 2456 return (uint16x4_t) __a; 2457 } 2458 2459 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 2460 vcreate_u32 (uint64_t __a) 2461 { 2462 return (uint32x2_t) __a; 2463 } 2464 2465 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 2466 vcreate_u64 (uint64_t __a) 2467 { 2468 return (uint64x1_t) __a; 2469 } 2470 2471 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 2472 vcreate_f64 (uint64_t __a) 2473 { 2474 return (float64x1_t) __builtin_aarch64_createdf (__a); 2475 } 2476 2477 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2478 vcreate_p8 (uint64_t __a) 2479 { 2480 return (poly8x8_t) __a; 2481 } 2482 2483 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2484 vcreate_p16 (uint64_t __a) 2485 { 2486 return (poly16x4_t) __a; 2487 } 2488 2489 /* vget_lane */ 2490 2491 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 2492 vget_lane_f32 (float32x2_t __a, const int __b) 2493 { 2494 return __aarch64_vget_lane_f32 (__a, __b); 2495 } 2496 2497 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 2498 vget_lane_f64 (float64x1_t __a, const int __b) 2499 { 2500 return __aarch64_vget_lane_f64 (__a, __b); 2501 } 2502 2503 __extension__ static __inline poly8_t __attribute__ ((__always_inline__)) 2504 vget_lane_p8 (poly8x8_t __a, const int __b) 2505 { 2506 return __aarch64_vget_lane_p8 (__a, __b); 2507 } 2508 2509 __extension__ static __inline poly16_t __attribute__ ((__always_inline__)) 2510 vget_lane_p16 (poly16x4_t __a, const int __b) 2511 { 2512 return __aarch64_vget_lane_p16 (__a, __b); 2513 } 2514 2515 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 2516 vget_lane_s8 (int8x8_t __a, const int __b) 2517 { 2518 return __aarch64_vget_lane_s8 (__a, __b); 2519 } 2520 2521 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 2522 vget_lane_s16 (int16x4_t __a, const int __b) 2523 { 2524 return __aarch64_vget_lane_s16 (__a, __b); 2525 } 2526 2527 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 2528 vget_lane_s32 (int32x2_t __a, const int __b) 2529 { 2530 return __aarch64_vget_lane_s32 (__a, __b); 2531 } 2532 2533 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 2534 vget_lane_s64 (int64x1_t __a, const int __b) 2535 { 2536 return __aarch64_vget_lane_s64 (__a, __b); 2537 } 2538 2539 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 2540 vget_lane_u8 (uint8x8_t __a, const int __b) 2541 { 2542 return __aarch64_vget_lane_u8 (__a, __b); 2543 } 2544 2545 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 2546 vget_lane_u16 (uint16x4_t __a, const int __b) 2547 { 2548 return __aarch64_vget_lane_u16 (__a, __b); 2549 } 2550 2551 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 2552 vget_lane_u32 (uint32x2_t __a, const int __b) 2553 { 2554 return __aarch64_vget_lane_u32 (__a, __b); 2555 } 2556 2557 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 2558 vget_lane_u64 (uint64x1_t __a, const int __b) 2559 { 2560 return __aarch64_vget_lane_u64 (__a, __b); 2561 } 2562 2563 /* vgetq_lane */ 2564 2565 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 2566 vgetq_lane_f32 (float32x4_t __a, const int __b) 2567 { 2568 return __aarch64_vgetq_lane_f32 (__a, __b); 2569 } 2570 2571 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 2572 vgetq_lane_f64 (float64x2_t __a, const int __b) 2573 { 2574 return __aarch64_vgetq_lane_f64 (__a, __b); 2575 } 2576 2577 __extension__ static __inline poly8_t __attribute__ ((__always_inline__)) 2578 vgetq_lane_p8 (poly8x16_t __a, const int __b) 2579 { 2580 return __aarch64_vgetq_lane_p8 (__a, __b); 2581 } 2582 2583 __extension__ static __inline poly16_t __attribute__ ((__always_inline__)) 2584 vgetq_lane_p16 (poly16x8_t __a, const int __b) 2585 { 2586 return __aarch64_vgetq_lane_p16 (__a, __b); 2587 } 2588 2589 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 2590 vgetq_lane_s8 (int8x16_t __a, const int __b) 2591 { 2592 return __aarch64_vgetq_lane_s8 (__a, __b); 2593 } 2594 2595 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 2596 vgetq_lane_s16 (int16x8_t __a, const int __b) 2597 { 2598 return __aarch64_vgetq_lane_s16 (__a, __b); 2599 } 2600 2601 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 2602 vgetq_lane_s32 (int32x4_t __a, const int __b) 2603 { 2604 return __aarch64_vgetq_lane_s32 (__a, __b); 2605 } 2606 2607 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 2608 vgetq_lane_s64 (int64x2_t __a, const int __b) 2609 { 2610 return __aarch64_vgetq_lane_s64 (__a, __b); 2611 } 2612 2613 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 2614 vgetq_lane_u8 (uint8x16_t __a, const int __b) 2615 { 2616 return __aarch64_vgetq_lane_u8 (__a, __b); 2617 } 2618 2619 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 2620 vgetq_lane_u16 (uint16x8_t __a, const int __b) 2621 { 2622 return __aarch64_vgetq_lane_u16 (__a, __b); 2623 } 2624 2625 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 2626 vgetq_lane_u32 (uint32x4_t __a, const int __b) 2627 { 2628 return __aarch64_vgetq_lane_u32 (__a, __b); 2629 } 2630 2631 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 2632 vgetq_lane_u64 (uint64x2_t __a, const int __b) 2633 { 2634 return __aarch64_vgetq_lane_u64 (__a, __b); 2635 } 2636 2637 /* vreinterpret */ 2638 2639 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2640 vreinterpret_p8_s8 (int8x8_t __a) 2641 { 2642 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); 2643 } 2644 2645 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2646 vreinterpret_p8_s16 (int16x4_t __a) 2647 { 2648 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); 2649 } 2650 2651 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2652 vreinterpret_p8_s32 (int32x2_t __a) 2653 { 2654 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); 2655 } 2656 2657 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2658 vreinterpret_p8_s64 (int64x1_t __a) 2659 { 2660 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); 2661 } 2662 2663 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2664 vreinterpret_p8_f32 (float32x2_t __a) 2665 { 2666 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); 2667 } 2668 2669 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2670 vreinterpret_p8_u8 (uint8x8_t __a) 2671 { 2672 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); 2673 } 2674 2675 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2676 vreinterpret_p8_u16 (uint16x4_t __a) 2677 { 2678 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); 2679 } 2680 2681 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2682 vreinterpret_p8_u32 (uint32x2_t __a) 2683 { 2684 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); 2685 } 2686 2687 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2688 vreinterpret_p8_u64 (uint64x1_t __a) 2689 { 2690 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); 2691 } 2692 2693 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 2694 vreinterpret_p8_p16 (poly16x4_t __a) 2695 { 2696 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); 2697 } 2698 2699 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2700 vreinterpretq_p8_s8 (int8x16_t __a) 2701 { 2702 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); 2703 } 2704 2705 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2706 vreinterpretq_p8_s16 (int16x8_t __a) 2707 { 2708 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); 2709 } 2710 2711 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2712 vreinterpretq_p8_s32 (int32x4_t __a) 2713 { 2714 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); 2715 } 2716 2717 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2718 vreinterpretq_p8_s64 (int64x2_t __a) 2719 { 2720 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); 2721 } 2722 2723 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2724 vreinterpretq_p8_f32 (float32x4_t __a) 2725 { 2726 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); 2727 } 2728 2729 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2730 vreinterpretq_p8_u8 (uint8x16_t __a) 2731 { 2732 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) 2733 __a); 2734 } 2735 2736 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2737 vreinterpretq_p8_u16 (uint16x8_t __a) 2738 { 2739 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) 2740 __a); 2741 } 2742 2743 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2744 vreinterpretq_p8_u32 (uint32x4_t __a) 2745 { 2746 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) 2747 __a); 2748 } 2749 2750 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2751 vreinterpretq_p8_u64 (uint64x2_t __a) 2752 { 2753 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) 2754 __a); 2755 } 2756 2757 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 2758 vreinterpretq_p8_p16 (poly16x8_t __a) 2759 { 2760 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) 2761 __a); 2762 } 2763 2764 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2765 vreinterpret_p16_s8 (int8x8_t __a) 2766 { 2767 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); 2768 } 2769 2770 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2771 vreinterpret_p16_s16 (int16x4_t __a) 2772 { 2773 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); 2774 } 2775 2776 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2777 vreinterpret_p16_s32 (int32x2_t __a) 2778 { 2779 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); 2780 } 2781 2782 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2783 vreinterpret_p16_s64 (int64x1_t __a) 2784 { 2785 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); 2786 } 2787 2788 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2789 vreinterpret_p16_f32 (float32x2_t __a) 2790 { 2791 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); 2792 } 2793 2794 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2795 vreinterpret_p16_u8 (uint8x8_t __a) 2796 { 2797 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); 2798 } 2799 2800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2801 vreinterpret_p16_u16 (uint16x4_t __a) 2802 { 2803 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); 2804 } 2805 2806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2807 vreinterpret_p16_u32 (uint32x2_t __a) 2808 { 2809 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); 2810 } 2811 2812 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2813 vreinterpret_p16_u64 (uint64x1_t __a) 2814 { 2815 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); 2816 } 2817 2818 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 2819 vreinterpret_p16_p8 (poly8x8_t __a) 2820 { 2821 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); 2822 } 2823 2824 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2825 vreinterpretq_p16_s8 (int8x16_t __a) 2826 { 2827 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); 2828 } 2829 2830 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2831 vreinterpretq_p16_s16 (int16x8_t __a) 2832 { 2833 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); 2834 } 2835 2836 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2837 vreinterpretq_p16_s32 (int32x4_t __a) 2838 { 2839 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); 2840 } 2841 2842 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2843 vreinterpretq_p16_s64 (int64x2_t __a) 2844 { 2845 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); 2846 } 2847 2848 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2849 vreinterpretq_p16_f32 (float32x4_t __a) 2850 { 2851 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); 2852 } 2853 2854 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2855 vreinterpretq_p16_u8 (uint8x16_t __a) 2856 { 2857 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) 2858 __a); 2859 } 2860 2861 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2862 vreinterpretq_p16_u16 (uint16x8_t __a) 2863 { 2864 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); 2865 } 2866 2867 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2868 vreinterpretq_p16_u32 (uint32x4_t __a) 2869 { 2870 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); 2871 } 2872 2873 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2874 vreinterpretq_p16_u64 (uint64x2_t __a) 2875 { 2876 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); 2877 } 2878 2879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 2880 vreinterpretq_p16_p8 (poly8x16_t __a) 2881 { 2882 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) 2883 __a); 2884 } 2885 2886 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2887 vreinterpret_f32_s8 (int8x8_t __a) 2888 { 2889 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a); 2890 } 2891 2892 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2893 vreinterpret_f32_s16 (int16x4_t __a) 2894 { 2895 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a); 2896 } 2897 2898 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2899 vreinterpret_f32_s32 (int32x2_t __a) 2900 { 2901 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a); 2902 } 2903 2904 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2905 vreinterpret_f32_s64 (int64x1_t __a) 2906 { 2907 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a); 2908 } 2909 2910 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2911 vreinterpret_f32_u8 (uint8x8_t __a) 2912 { 2913 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); 2914 } 2915 2916 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2917 vreinterpret_f32_u16 (uint16x4_t __a) 2918 { 2919 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) 2920 __a); 2921 } 2922 2923 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2924 vreinterpret_f32_u32 (uint32x2_t __a) 2925 { 2926 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t) 2927 __a); 2928 } 2929 2930 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2931 vreinterpret_f32_u64 (uint64x1_t __a) 2932 { 2933 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a); 2934 } 2935 2936 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2937 vreinterpret_f32_p8 (poly8x8_t __a) 2938 { 2939 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); 2940 } 2941 2942 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 2943 vreinterpret_f32_p16 (poly16x4_t __a) 2944 { 2945 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) 2946 __a); 2947 } 2948 2949 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2950 vreinterpretq_f32_s8 (int8x16_t __a) 2951 { 2952 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a); 2953 } 2954 2955 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2956 vreinterpretq_f32_s16 (int16x8_t __a) 2957 { 2958 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a); 2959 } 2960 2961 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2962 vreinterpretq_f32_s32 (int32x4_t __a) 2963 { 2964 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a); 2965 } 2966 2967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2968 vreinterpretq_f32_s64 (int64x2_t __a) 2969 { 2970 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a); 2971 } 2972 2973 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2974 vreinterpretq_f32_u8 (uint8x16_t __a) 2975 { 2976 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) 2977 __a); 2978 } 2979 2980 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2981 vreinterpretq_f32_u16 (uint16x8_t __a) 2982 { 2983 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) 2984 __a); 2985 } 2986 2987 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2988 vreinterpretq_f32_u32 (uint32x4_t __a) 2989 { 2990 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t) 2991 __a); 2992 } 2993 2994 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 2995 vreinterpretq_f32_u64 (uint64x2_t __a) 2996 { 2997 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t) 2998 __a); 2999 } 3000 3001 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 3002 vreinterpretq_f32_p8 (poly8x16_t __a) 3003 { 3004 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) 3005 __a); 3006 } 3007 3008 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 3009 vreinterpretq_f32_p16 (poly16x8_t __a) 3010 { 3011 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) 3012 __a); 3013 } 3014 3015 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3016 vreinterpret_s64_s8 (int8x8_t __a) 3017 { 3018 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); 3019 } 3020 3021 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3022 vreinterpret_s64_s16 (int16x4_t __a) 3023 { 3024 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); 3025 } 3026 3027 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3028 vreinterpret_s64_s32 (int32x2_t __a) 3029 { 3030 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); 3031 } 3032 3033 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3034 vreinterpret_s64_f32 (float32x2_t __a) 3035 { 3036 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); 3037 } 3038 3039 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3040 vreinterpret_s64_u8 (uint8x8_t __a) 3041 { 3042 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); 3043 } 3044 3045 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3046 vreinterpret_s64_u16 (uint16x4_t __a) 3047 { 3048 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); 3049 } 3050 3051 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3052 vreinterpret_s64_u32 (uint32x2_t __a) 3053 { 3054 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); 3055 } 3056 3057 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3058 vreinterpret_s64_u64 (uint64x1_t __a) 3059 { 3060 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a); 3061 } 3062 3063 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3064 vreinterpret_s64_p8 (poly8x8_t __a) 3065 { 3066 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); 3067 } 3068 3069 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 3070 vreinterpret_s64_p16 (poly16x4_t __a) 3071 { 3072 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); 3073 } 3074 3075 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3076 vreinterpretq_s64_s8 (int8x16_t __a) 3077 { 3078 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); 3079 } 3080 3081 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3082 vreinterpretq_s64_s16 (int16x8_t __a) 3083 { 3084 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); 3085 } 3086 3087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3088 vreinterpretq_s64_s32 (int32x4_t __a) 3089 { 3090 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); 3091 } 3092 3093 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3094 vreinterpretq_s64_f32 (float32x4_t __a) 3095 { 3096 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); 3097 } 3098 3099 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3100 vreinterpretq_s64_u8 (uint8x16_t __a) 3101 { 3102 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); 3103 } 3104 3105 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3106 vreinterpretq_s64_u16 (uint16x8_t __a) 3107 { 3108 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); 3109 } 3110 3111 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3112 vreinterpretq_s64_u32 (uint32x4_t __a) 3113 { 3114 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); 3115 } 3116 3117 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3118 vreinterpretq_s64_u64 (uint64x2_t __a) 3119 { 3120 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a); 3121 } 3122 3123 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3124 vreinterpretq_s64_p8 (poly8x16_t __a) 3125 { 3126 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); 3127 } 3128 3129 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 3130 vreinterpretq_s64_p16 (poly16x8_t __a) 3131 { 3132 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); 3133 } 3134 3135 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3136 vreinterpret_u64_s8 (int8x8_t __a) 3137 { 3138 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); 3139 } 3140 3141 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3142 vreinterpret_u64_s16 (int16x4_t __a) 3143 { 3144 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); 3145 } 3146 3147 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3148 vreinterpret_u64_s32 (int32x2_t __a) 3149 { 3150 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); 3151 } 3152 3153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3154 vreinterpret_u64_s64 (int64x1_t __a) 3155 { 3156 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a); 3157 } 3158 3159 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3160 vreinterpret_u64_f32 (float32x2_t __a) 3161 { 3162 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); 3163 } 3164 3165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3166 vreinterpret_u64_u8 (uint8x8_t __a) 3167 { 3168 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); 3169 } 3170 3171 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3172 vreinterpret_u64_u16 (uint16x4_t __a) 3173 { 3174 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); 3175 } 3176 3177 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3178 vreinterpret_u64_u32 (uint32x2_t __a) 3179 { 3180 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); 3181 } 3182 3183 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3184 vreinterpret_u64_p8 (poly8x8_t __a) 3185 { 3186 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); 3187 } 3188 3189 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 3190 vreinterpret_u64_p16 (poly16x4_t __a) 3191 { 3192 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); 3193 } 3194 3195 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3196 vreinterpretq_u64_s8 (int8x16_t __a) 3197 { 3198 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); 3199 } 3200 3201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3202 vreinterpretq_u64_s16 (int16x8_t __a) 3203 { 3204 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); 3205 } 3206 3207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3208 vreinterpretq_u64_s32 (int32x4_t __a) 3209 { 3210 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); 3211 } 3212 3213 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3214 vreinterpretq_u64_s64 (int64x2_t __a) 3215 { 3216 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a); 3217 } 3218 3219 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3220 vreinterpretq_u64_f32 (float32x4_t __a) 3221 { 3222 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); 3223 } 3224 3225 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3226 vreinterpretq_u64_u8 (uint8x16_t __a) 3227 { 3228 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) 3229 __a); 3230 } 3231 3232 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3233 vreinterpretq_u64_u16 (uint16x8_t __a) 3234 { 3235 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); 3236 } 3237 3238 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3239 vreinterpretq_u64_u32 (uint32x4_t __a) 3240 { 3241 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); 3242 } 3243 3244 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3245 vreinterpretq_u64_p8 (poly8x16_t __a) 3246 { 3247 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) 3248 __a); 3249 } 3250 3251 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 3252 vreinterpretq_u64_p16 (poly16x8_t __a) 3253 { 3254 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); 3255 } 3256 3257 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3258 vreinterpret_s8_s16 (int16x4_t __a) 3259 { 3260 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); 3261 } 3262 3263 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3264 vreinterpret_s8_s32 (int32x2_t __a) 3265 { 3266 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); 3267 } 3268 3269 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3270 vreinterpret_s8_s64 (int64x1_t __a) 3271 { 3272 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); 3273 } 3274 3275 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3276 vreinterpret_s8_f32 (float32x2_t __a) 3277 { 3278 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); 3279 } 3280 3281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3282 vreinterpret_s8_u8 (uint8x8_t __a) 3283 { 3284 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); 3285 } 3286 3287 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3288 vreinterpret_s8_u16 (uint16x4_t __a) 3289 { 3290 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); 3291 } 3292 3293 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3294 vreinterpret_s8_u32 (uint32x2_t __a) 3295 { 3296 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); 3297 } 3298 3299 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3300 vreinterpret_s8_u64 (uint64x1_t __a) 3301 { 3302 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); 3303 } 3304 3305 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3306 vreinterpret_s8_p8 (poly8x8_t __a) 3307 { 3308 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); 3309 } 3310 3311 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 3312 vreinterpret_s8_p16 (poly16x4_t __a) 3313 { 3314 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); 3315 } 3316 3317 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3318 vreinterpretq_s8_s16 (int16x8_t __a) 3319 { 3320 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); 3321 } 3322 3323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3324 vreinterpretq_s8_s32 (int32x4_t __a) 3325 { 3326 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); 3327 } 3328 3329 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3330 vreinterpretq_s8_s64 (int64x2_t __a) 3331 { 3332 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); 3333 } 3334 3335 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3336 vreinterpretq_s8_f32 (float32x4_t __a) 3337 { 3338 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); 3339 } 3340 3341 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3342 vreinterpretq_s8_u8 (uint8x16_t __a) 3343 { 3344 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) 3345 __a); 3346 } 3347 3348 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3349 vreinterpretq_s8_u16 (uint16x8_t __a) 3350 { 3351 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); 3352 } 3353 3354 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3355 vreinterpretq_s8_u32 (uint32x4_t __a) 3356 { 3357 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a); 3358 } 3359 3360 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3361 vreinterpretq_s8_u64 (uint64x2_t __a) 3362 { 3363 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a); 3364 } 3365 3366 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3367 vreinterpretq_s8_p8 (poly8x16_t __a) 3368 { 3369 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) 3370 __a); 3371 } 3372 3373 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 3374 vreinterpretq_s8_p16 (poly16x8_t __a) 3375 { 3376 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); 3377 } 3378 3379 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3380 vreinterpret_s16_s8 (int8x8_t __a) 3381 { 3382 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); 3383 } 3384 3385 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3386 vreinterpret_s16_s32 (int32x2_t __a) 3387 { 3388 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); 3389 } 3390 3391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3392 vreinterpret_s16_s64 (int64x1_t __a) 3393 { 3394 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); 3395 } 3396 3397 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3398 vreinterpret_s16_f32 (float32x2_t __a) 3399 { 3400 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); 3401 } 3402 3403 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3404 vreinterpret_s16_u8 (uint8x8_t __a) 3405 { 3406 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); 3407 } 3408 3409 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3410 vreinterpret_s16_u16 (uint16x4_t __a) 3411 { 3412 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); 3413 } 3414 3415 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3416 vreinterpret_s16_u32 (uint32x2_t __a) 3417 { 3418 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); 3419 } 3420 3421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3422 vreinterpret_s16_u64 (uint64x1_t __a) 3423 { 3424 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); 3425 } 3426 3427 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3428 vreinterpret_s16_p8 (poly8x8_t __a) 3429 { 3430 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); 3431 } 3432 3433 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 3434 vreinterpret_s16_p16 (poly16x4_t __a) 3435 { 3436 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); 3437 } 3438 3439 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3440 vreinterpretq_s16_s8 (int8x16_t __a) 3441 { 3442 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); 3443 } 3444 3445 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3446 vreinterpretq_s16_s32 (int32x4_t __a) 3447 { 3448 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); 3449 } 3450 3451 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3452 vreinterpretq_s16_s64 (int64x2_t __a) 3453 { 3454 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); 3455 } 3456 3457 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3458 vreinterpretq_s16_f32 (float32x4_t __a) 3459 { 3460 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); 3461 } 3462 3463 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3464 vreinterpretq_s16_u8 (uint8x16_t __a) 3465 { 3466 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); 3467 } 3468 3469 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3470 vreinterpretq_s16_u16 (uint16x8_t __a) 3471 { 3472 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); 3473 } 3474 3475 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3476 vreinterpretq_s16_u32 (uint32x4_t __a) 3477 { 3478 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); 3479 } 3480 3481 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3482 vreinterpretq_s16_u64 (uint64x2_t __a) 3483 { 3484 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); 3485 } 3486 3487 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3488 vreinterpretq_s16_p8 (poly8x16_t __a) 3489 { 3490 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); 3491 } 3492 3493 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 3494 vreinterpretq_s16_p16 (poly16x8_t __a) 3495 { 3496 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); 3497 } 3498 3499 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3500 vreinterpret_s32_s8 (int8x8_t __a) 3501 { 3502 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); 3503 } 3504 3505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3506 vreinterpret_s32_s16 (int16x4_t __a) 3507 { 3508 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); 3509 } 3510 3511 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3512 vreinterpret_s32_s64 (int64x1_t __a) 3513 { 3514 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); 3515 } 3516 3517 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3518 vreinterpret_s32_f32 (float32x2_t __a) 3519 { 3520 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); 3521 } 3522 3523 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3524 vreinterpret_s32_u8 (uint8x8_t __a) 3525 { 3526 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); 3527 } 3528 3529 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3530 vreinterpret_s32_u16 (uint16x4_t __a) 3531 { 3532 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); 3533 } 3534 3535 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3536 vreinterpret_s32_u32 (uint32x2_t __a) 3537 { 3538 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a); 3539 } 3540 3541 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3542 vreinterpret_s32_u64 (uint64x1_t __a) 3543 { 3544 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); 3545 } 3546 3547 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3548 vreinterpret_s32_p8 (poly8x8_t __a) 3549 { 3550 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); 3551 } 3552 3553 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 3554 vreinterpret_s32_p16 (poly16x4_t __a) 3555 { 3556 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); 3557 } 3558 3559 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3560 vreinterpretq_s32_s8 (int8x16_t __a) 3561 { 3562 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); 3563 } 3564 3565 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3566 vreinterpretq_s32_s16 (int16x8_t __a) 3567 { 3568 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); 3569 } 3570 3571 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3572 vreinterpretq_s32_s64 (int64x2_t __a) 3573 { 3574 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); 3575 } 3576 3577 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3578 vreinterpretq_s32_f32 (float32x4_t __a) 3579 { 3580 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); 3581 } 3582 3583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3584 vreinterpretq_s32_u8 (uint8x16_t __a) 3585 { 3586 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); 3587 } 3588 3589 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3590 vreinterpretq_s32_u16 (uint16x8_t __a) 3591 { 3592 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); 3593 } 3594 3595 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3596 vreinterpretq_s32_u32 (uint32x4_t __a) 3597 { 3598 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a); 3599 } 3600 3601 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3602 vreinterpretq_s32_u64 (uint64x2_t __a) 3603 { 3604 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); 3605 } 3606 3607 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3608 vreinterpretq_s32_p8 (poly8x16_t __a) 3609 { 3610 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); 3611 } 3612 3613 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 3614 vreinterpretq_s32_p16 (poly16x8_t __a) 3615 { 3616 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); 3617 } 3618 3619 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3620 vreinterpret_u8_s8 (int8x8_t __a) 3621 { 3622 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); 3623 } 3624 3625 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3626 vreinterpret_u8_s16 (int16x4_t __a) 3627 { 3628 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); 3629 } 3630 3631 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3632 vreinterpret_u8_s32 (int32x2_t __a) 3633 { 3634 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); 3635 } 3636 3637 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3638 vreinterpret_u8_s64 (int64x1_t __a) 3639 { 3640 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); 3641 } 3642 3643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3644 vreinterpret_u8_f32 (float32x2_t __a) 3645 { 3646 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); 3647 } 3648 3649 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3650 vreinterpret_u8_u16 (uint16x4_t __a) 3651 { 3652 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); 3653 } 3654 3655 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3656 vreinterpret_u8_u32 (uint32x2_t __a) 3657 { 3658 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); 3659 } 3660 3661 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3662 vreinterpret_u8_u64 (uint64x1_t __a) 3663 { 3664 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); 3665 } 3666 3667 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3668 vreinterpret_u8_p8 (poly8x8_t __a) 3669 { 3670 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); 3671 } 3672 3673 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 3674 vreinterpret_u8_p16 (poly16x4_t __a) 3675 { 3676 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); 3677 } 3678 3679 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3680 vreinterpretq_u8_s8 (int8x16_t __a) 3681 { 3682 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); 3683 } 3684 3685 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3686 vreinterpretq_u8_s16 (int16x8_t __a) 3687 { 3688 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); 3689 } 3690 3691 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3692 vreinterpretq_u8_s32 (int32x4_t __a) 3693 { 3694 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); 3695 } 3696 3697 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3698 vreinterpretq_u8_s64 (int64x2_t __a) 3699 { 3700 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); 3701 } 3702 3703 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3704 vreinterpretq_u8_f32 (float32x4_t __a) 3705 { 3706 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); 3707 } 3708 3709 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3710 vreinterpretq_u8_u16 (uint16x8_t __a) 3711 { 3712 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) 3713 __a); 3714 } 3715 3716 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3717 vreinterpretq_u8_u32 (uint32x4_t __a) 3718 { 3719 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) 3720 __a); 3721 } 3722 3723 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3724 vreinterpretq_u8_u64 (uint64x2_t __a) 3725 { 3726 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) 3727 __a); 3728 } 3729 3730 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3731 vreinterpretq_u8_p8 (poly8x16_t __a) 3732 { 3733 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) 3734 __a); 3735 } 3736 3737 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 3738 vreinterpretq_u8_p16 (poly16x8_t __a) 3739 { 3740 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) 3741 __a); 3742 } 3743 3744 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3745 vreinterpret_u16_s8 (int8x8_t __a) 3746 { 3747 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); 3748 } 3749 3750 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3751 vreinterpret_u16_s16 (int16x4_t __a) 3752 { 3753 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); 3754 } 3755 3756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3757 vreinterpret_u16_s32 (int32x2_t __a) 3758 { 3759 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); 3760 } 3761 3762 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3763 vreinterpret_u16_s64 (int64x1_t __a) 3764 { 3765 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); 3766 } 3767 3768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3769 vreinterpret_u16_f32 (float32x2_t __a) 3770 { 3771 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); 3772 } 3773 3774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3775 vreinterpret_u16_u8 (uint8x8_t __a) 3776 { 3777 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); 3778 } 3779 3780 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3781 vreinterpret_u16_u32 (uint32x2_t __a) 3782 { 3783 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); 3784 } 3785 3786 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3787 vreinterpret_u16_u64 (uint64x1_t __a) 3788 { 3789 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); 3790 } 3791 3792 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3793 vreinterpret_u16_p8 (poly8x8_t __a) 3794 { 3795 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); 3796 } 3797 3798 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 3799 vreinterpret_u16_p16 (poly16x4_t __a) 3800 { 3801 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); 3802 } 3803 3804 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3805 vreinterpretq_u16_s8 (int8x16_t __a) 3806 { 3807 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); 3808 } 3809 3810 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3811 vreinterpretq_u16_s16 (int16x8_t __a) 3812 { 3813 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); 3814 } 3815 3816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3817 vreinterpretq_u16_s32 (int32x4_t __a) 3818 { 3819 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); 3820 } 3821 3822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3823 vreinterpretq_u16_s64 (int64x2_t __a) 3824 { 3825 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); 3826 } 3827 3828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3829 vreinterpretq_u16_f32 (float32x4_t __a) 3830 { 3831 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); 3832 } 3833 3834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3835 vreinterpretq_u16_u8 (uint8x16_t __a) 3836 { 3837 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) 3838 __a); 3839 } 3840 3841 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3842 vreinterpretq_u16_u32 (uint32x4_t __a) 3843 { 3844 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); 3845 } 3846 3847 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3848 vreinterpretq_u16_u64 (uint64x2_t __a) 3849 { 3850 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); 3851 } 3852 3853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3854 vreinterpretq_u16_p8 (poly8x16_t __a) 3855 { 3856 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) 3857 __a); 3858 } 3859 3860 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 3861 vreinterpretq_u16_p16 (poly16x8_t __a) 3862 { 3863 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); 3864 } 3865 3866 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3867 vreinterpret_u32_s8 (int8x8_t __a) 3868 { 3869 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); 3870 } 3871 3872 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3873 vreinterpret_u32_s16 (int16x4_t __a) 3874 { 3875 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); 3876 } 3877 3878 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3879 vreinterpret_u32_s32 (int32x2_t __a) 3880 { 3881 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a); 3882 } 3883 3884 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3885 vreinterpret_u32_s64 (int64x1_t __a) 3886 { 3887 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); 3888 } 3889 3890 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3891 vreinterpret_u32_f32 (float32x2_t __a) 3892 { 3893 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); 3894 } 3895 3896 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3897 vreinterpret_u32_u8 (uint8x8_t __a) 3898 { 3899 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); 3900 } 3901 3902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3903 vreinterpret_u32_u16 (uint16x4_t __a) 3904 { 3905 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); 3906 } 3907 3908 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3909 vreinterpret_u32_u64 (uint64x1_t __a) 3910 { 3911 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); 3912 } 3913 3914 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3915 vreinterpret_u32_p8 (poly8x8_t __a) 3916 { 3917 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); 3918 } 3919 3920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 3921 vreinterpret_u32_p16 (poly16x4_t __a) 3922 { 3923 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); 3924 } 3925 3926 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3927 vreinterpretq_u32_s8 (int8x16_t __a) 3928 { 3929 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); 3930 } 3931 3932 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3933 vreinterpretq_u32_s16 (int16x8_t __a) 3934 { 3935 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); 3936 } 3937 3938 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3939 vreinterpretq_u32_s32 (int32x4_t __a) 3940 { 3941 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a); 3942 } 3943 3944 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3945 vreinterpretq_u32_s64 (int64x2_t __a) 3946 { 3947 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); 3948 } 3949 3950 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3951 vreinterpretq_u32_f32 (float32x4_t __a) 3952 { 3953 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); 3954 } 3955 3956 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3957 vreinterpretq_u32_u8 (uint8x16_t __a) 3958 { 3959 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) 3960 __a); 3961 } 3962 3963 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3964 vreinterpretq_u32_u16 (uint16x8_t __a) 3965 { 3966 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); 3967 } 3968 3969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3970 vreinterpretq_u32_u64 (uint64x2_t __a) 3971 { 3972 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); 3973 } 3974 3975 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3976 vreinterpretq_u32_p8 (poly8x16_t __a) 3977 { 3978 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) 3979 __a); 3980 } 3981 3982 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 3983 vreinterpretq_u32_p16 (poly16x8_t __a) 3984 { 3985 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); 3986 } 3987 3988 #define __GET_LOW(__TYPE) \ 3989 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ 3990 uint64_t lo = vgetq_lane_u64 (tmp, 0); \ 3991 return vreinterpret_##__TYPE##_u64 (lo); 3992 3993 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 3994 vget_low_f32 (float32x4_t __a) 3995 { 3996 __GET_LOW (f32); 3997 } 3998 3999 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 4000 vget_low_f64 (float64x2_t __a) 4001 { 4002 return vgetq_lane_f64 (__a, 0); 4003 } 4004 4005 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 4006 vget_low_p8 (poly8x16_t __a) 4007 { 4008 __GET_LOW (p8); 4009 } 4010 4011 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 4012 vget_low_p16 (poly16x8_t __a) 4013 { 4014 __GET_LOW (p16); 4015 } 4016 4017 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 4018 vget_low_s8 (int8x16_t __a) 4019 { 4020 __GET_LOW (s8); 4021 } 4022 4023 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 4024 vget_low_s16 (int16x8_t __a) 4025 { 4026 __GET_LOW (s16); 4027 } 4028 4029 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 4030 vget_low_s32 (int32x4_t __a) 4031 { 4032 __GET_LOW (s32); 4033 } 4034 4035 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 4036 vget_low_s64 (int64x2_t __a) 4037 { 4038 return vgetq_lane_s64 (__a, 0); 4039 } 4040 4041 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 4042 vget_low_u8 (uint8x16_t __a) 4043 { 4044 __GET_LOW (u8); 4045 } 4046 4047 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 4048 vget_low_u16 (uint16x8_t __a) 4049 { 4050 __GET_LOW (u16); 4051 } 4052 4053 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 4054 vget_low_u32 (uint32x4_t __a) 4055 { 4056 __GET_LOW (u32); 4057 } 4058 4059 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 4060 vget_low_u64 (uint64x2_t __a) 4061 { 4062 return vgetq_lane_u64 (__a, 0); 4063 } 4064 4065 #undef __GET_LOW 4066 4067 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 4068 vcombine_s8 (int8x8_t __a, int8x8_t __b) 4069 { 4070 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b); 4071 } 4072 4073 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4074 vcombine_s16 (int16x4_t __a, int16x4_t __b) 4075 { 4076 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b); 4077 } 4078 4079 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4080 vcombine_s32 (int32x2_t __a, int32x2_t __b) 4081 { 4082 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b); 4083 } 4084 4085 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 4086 vcombine_s64 (int64x1_t __a, int64x1_t __b) 4087 { 4088 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b); 4089 } 4090 4091 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 4092 vcombine_f32 (float32x2_t __a, float32x2_t __b) 4093 { 4094 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b); 4095 } 4096 4097 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 4098 vcombine_u8 (uint8x8_t __a, uint8x8_t __b) 4099 { 4100 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, 4101 (int8x8_t) __b); 4102 } 4103 4104 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 4105 vcombine_u16 (uint16x4_t __a, uint16x4_t __b) 4106 { 4107 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, 4108 (int16x4_t) __b); 4109 } 4110 4111 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 4112 vcombine_u32 (uint32x2_t __a, uint32x2_t __b) 4113 { 4114 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a, 4115 (int32x2_t) __b); 4116 } 4117 4118 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 4119 vcombine_u64 (uint64x1_t __a, uint64x1_t __b) 4120 { 4121 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a, 4122 (int64x1_t) __b); 4123 } 4124 4125 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 4126 vcombine_f64 (float64x1_t __a, float64x1_t __b) 4127 { 4128 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b); 4129 } 4130 4131 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 4132 vcombine_p8 (poly8x8_t __a, poly8x8_t __b) 4133 { 4134 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, 4135 (int8x8_t) __b); 4136 } 4137 4138 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 4139 vcombine_p16 (poly16x4_t __a, poly16x4_t __b) 4140 { 4141 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, 4142 (int16x4_t) __b); 4143 } 4144 4145 /* Start of temporary inline asm implementations. */ 4146 4147 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 4148 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c) 4149 { 4150 int8x8_t result; 4151 __asm__ ("saba %0.8b,%2.8b,%3.8b" 4152 : "=w"(result) 4153 : "0"(a), "w"(b), "w"(c) 4154 : /* No clobbers */); 4155 return result; 4156 } 4157 4158 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 4159 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c) 4160 { 4161 int16x4_t result; 4162 __asm__ ("saba %0.4h,%2.4h,%3.4h" 4163 : "=w"(result) 4164 : "0"(a), "w"(b), "w"(c) 4165 : /* No clobbers */); 4166 return result; 4167 } 4168 4169 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 4170 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c) 4171 { 4172 int32x2_t result; 4173 __asm__ ("saba %0.2s,%2.2s,%3.2s" 4174 : "=w"(result) 4175 : "0"(a), "w"(b), "w"(c) 4176 : /* No clobbers */); 4177 return result; 4178 } 4179 4180 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 4181 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) 4182 { 4183 uint8x8_t result; 4184 __asm__ ("uaba %0.8b,%2.8b,%3.8b" 4185 : "=w"(result) 4186 : "0"(a), "w"(b), "w"(c) 4187 : /* No clobbers */); 4188 return result; 4189 } 4190 4191 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 4192 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) 4193 { 4194 uint16x4_t result; 4195 __asm__ ("uaba %0.4h,%2.4h,%3.4h" 4196 : "=w"(result) 4197 : "0"(a), "w"(b), "w"(c) 4198 : /* No clobbers */); 4199 return result; 4200 } 4201 4202 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 4203 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) 4204 { 4205 uint32x2_t result; 4206 __asm__ ("uaba %0.2s,%2.2s,%3.2s" 4207 : "=w"(result) 4208 : "0"(a), "w"(b), "w"(c) 4209 : /* No clobbers */); 4210 return result; 4211 } 4212 4213 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4214 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) 4215 { 4216 int16x8_t result; 4217 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b" 4218 : "=w"(result) 4219 : "0"(a), "w"(b), "w"(c) 4220 : /* No clobbers */); 4221 return result; 4222 } 4223 4224 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4225 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) 4226 { 4227 int32x4_t result; 4228 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h" 4229 : "=w"(result) 4230 : "0"(a), "w"(b), "w"(c) 4231 : /* No clobbers */); 4232 return result; 4233 } 4234 4235 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 4236 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) 4237 { 4238 int64x2_t result; 4239 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s" 4240 : "=w"(result) 4241 : "0"(a), "w"(b), "w"(c) 4242 : /* No clobbers */); 4243 return result; 4244 } 4245 4246 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 4247 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) 4248 { 4249 uint16x8_t result; 4250 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b" 4251 : "=w"(result) 4252 : "0"(a), "w"(b), "w"(c) 4253 : /* No clobbers */); 4254 return result; 4255 } 4256 4257 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 4258 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) 4259 { 4260 uint32x4_t result; 4261 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h" 4262 : "=w"(result) 4263 : "0"(a), "w"(b), "w"(c) 4264 : /* No clobbers */); 4265 return result; 4266 } 4267 4268 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 4269 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) 4270 { 4271 uint64x2_t result; 4272 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s" 4273 : "=w"(result) 4274 : "0"(a), "w"(b), "w"(c) 4275 : /* No clobbers */); 4276 return result; 4277 } 4278 4279 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4280 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) 4281 { 4282 int16x8_t result; 4283 __asm__ ("sabal %0.8h,%2.8b,%3.8b" 4284 : "=w"(result) 4285 : "0"(a), "w"(b), "w"(c) 4286 : /* No clobbers */); 4287 return result; 4288 } 4289 4290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4291 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) 4292 { 4293 int32x4_t result; 4294 __asm__ ("sabal %0.4s,%2.4h,%3.4h" 4295 : "=w"(result) 4296 : "0"(a), "w"(b), "w"(c) 4297 : /* No clobbers */); 4298 return result; 4299 } 4300 4301 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 4302 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) 4303 { 4304 int64x2_t result; 4305 __asm__ ("sabal %0.2d,%2.2s,%3.2s" 4306 : "=w"(result) 4307 : "0"(a), "w"(b), "w"(c) 4308 : /* No clobbers */); 4309 return result; 4310 } 4311 4312 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 4313 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) 4314 { 4315 uint16x8_t result; 4316 __asm__ ("uabal %0.8h,%2.8b,%3.8b" 4317 : "=w"(result) 4318 : "0"(a), "w"(b), "w"(c) 4319 : /* No clobbers */); 4320 return result; 4321 } 4322 4323 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 4324 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) 4325 { 4326 uint32x4_t result; 4327 __asm__ ("uabal %0.4s,%2.4h,%3.4h" 4328 : "=w"(result) 4329 : "0"(a), "w"(b), "w"(c) 4330 : /* No clobbers */); 4331 return result; 4332 } 4333 4334 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 4335 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) 4336 { 4337 uint64x2_t result; 4338 __asm__ ("uabal %0.2d,%2.2s,%3.2s" 4339 : "=w"(result) 4340 : "0"(a), "w"(b), "w"(c) 4341 : /* No clobbers */); 4342 return result; 4343 } 4344 4345 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 4346 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) 4347 { 4348 int8x16_t result; 4349 __asm__ ("saba %0.16b,%2.16b,%3.16b" 4350 : "=w"(result) 4351 : "0"(a), "w"(b), "w"(c) 4352 : /* No clobbers */); 4353 return result; 4354 } 4355 4356 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4357 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) 4358 { 4359 int16x8_t result; 4360 __asm__ ("saba %0.8h,%2.8h,%3.8h" 4361 : "=w"(result) 4362 : "0"(a), "w"(b), "w"(c) 4363 : /* No clobbers */); 4364 return result; 4365 } 4366 4367 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4368 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) 4369 { 4370 int32x4_t result; 4371 __asm__ ("saba %0.4s,%2.4s,%3.4s" 4372 : "=w"(result) 4373 : "0"(a), "w"(b), "w"(c) 4374 : /* No clobbers */); 4375 return result; 4376 } 4377 4378 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 4379 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) 4380 { 4381 uint8x16_t result; 4382 __asm__ ("uaba %0.16b,%2.16b,%3.16b" 4383 : "=w"(result) 4384 : "0"(a), "w"(b), "w"(c) 4385 : /* No clobbers */); 4386 return result; 4387 } 4388 4389 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 4390 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) 4391 { 4392 uint16x8_t result; 4393 __asm__ ("uaba %0.8h,%2.8h,%3.8h" 4394 : "=w"(result) 4395 : "0"(a), "w"(b), "w"(c) 4396 : /* No clobbers */); 4397 return result; 4398 } 4399 4400 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 4401 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) 4402 { 4403 uint32x4_t result; 4404 __asm__ ("uaba %0.4s,%2.4s,%3.4s" 4405 : "=w"(result) 4406 : "0"(a), "w"(b), "w"(c) 4407 : /* No clobbers */); 4408 return result; 4409 } 4410 4411 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 4412 vabd_f32 (float32x2_t a, float32x2_t b) 4413 { 4414 float32x2_t result; 4415 __asm__ ("fabd %0.2s, %1.2s, %2.2s" 4416 : "=w"(result) 4417 : "w"(a), "w"(b) 4418 : /* No clobbers */); 4419 return result; 4420 } 4421 4422 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 4423 vabd_s8 (int8x8_t a, int8x8_t b) 4424 { 4425 int8x8_t result; 4426 __asm__ ("sabd %0.8b, %1.8b, %2.8b" 4427 : "=w"(result) 4428 : "w"(a), "w"(b) 4429 : /* No clobbers */); 4430 return result; 4431 } 4432 4433 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 4434 vabd_s16 (int16x4_t a, int16x4_t b) 4435 { 4436 int16x4_t result; 4437 __asm__ ("sabd %0.4h, %1.4h, %2.4h" 4438 : "=w"(result) 4439 : "w"(a), "w"(b) 4440 : /* No clobbers */); 4441 return result; 4442 } 4443 4444 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 4445 vabd_s32 (int32x2_t a, int32x2_t b) 4446 { 4447 int32x2_t result; 4448 __asm__ ("sabd %0.2s, %1.2s, %2.2s" 4449 : "=w"(result) 4450 : "w"(a), "w"(b) 4451 : /* No clobbers */); 4452 return result; 4453 } 4454 4455 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 4456 vabd_u8 (uint8x8_t a, uint8x8_t b) 4457 { 4458 uint8x8_t result; 4459 __asm__ ("uabd %0.8b, %1.8b, %2.8b" 4460 : "=w"(result) 4461 : "w"(a), "w"(b) 4462 : /* No clobbers */); 4463 return result; 4464 } 4465 4466 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 4467 vabd_u16 (uint16x4_t a, uint16x4_t b) 4468 { 4469 uint16x4_t result; 4470 __asm__ ("uabd %0.4h, %1.4h, %2.4h" 4471 : "=w"(result) 4472 : "w"(a), "w"(b) 4473 : /* No clobbers */); 4474 return result; 4475 } 4476 4477 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 4478 vabd_u32 (uint32x2_t a, uint32x2_t b) 4479 { 4480 uint32x2_t result; 4481 __asm__ ("uabd %0.2s, %1.2s, %2.2s" 4482 : "=w"(result) 4483 : "w"(a), "w"(b) 4484 : /* No clobbers */); 4485 return result; 4486 } 4487 4488 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 4489 vabdd_f64 (float64_t a, float64_t b) 4490 { 4491 float64_t result; 4492 __asm__ ("fabd %d0, %d1, %d2" 4493 : "=w"(result) 4494 : "w"(a), "w"(b) 4495 : /* No clobbers */); 4496 return result; 4497 } 4498 4499 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4500 vabdl_high_s8 (int8x16_t a, int8x16_t b) 4501 { 4502 int16x8_t result; 4503 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b" 4504 : "=w"(result) 4505 : "w"(a), "w"(b) 4506 : /* No clobbers */); 4507 return result; 4508 } 4509 4510 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4511 vabdl_high_s16 (int16x8_t a, int16x8_t b) 4512 { 4513 int32x4_t result; 4514 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h" 4515 : "=w"(result) 4516 : "w"(a), "w"(b) 4517 : /* No clobbers */); 4518 return result; 4519 } 4520 4521 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 4522 vabdl_high_s32 (int32x4_t a, int32x4_t b) 4523 { 4524 int64x2_t result; 4525 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s" 4526 : "=w"(result) 4527 : "w"(a), "w"(b) 4528 : /* No clobbers */); 4529 return result; 4530 } 4531 4532 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 4533 vabdl_high_u8 (uint8x16_t a, uint8x16_t b) 4534 { 4535 uint16x8_t result; 4536 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b" 4537 : "=w"(result) 4538 : "w"(a), "w"(b) 4539 : /* No clobbers */); 4540 return result; 4541 } 4542 4543 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 4544 vabdl_high_u16 (uint16x8_t a, uint16x8_t b) 4545 { 4546 uint32x4_t result; 4547 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h" 4548 : "=w"(result) 4549 : "w"(a), "w"(b) 4550 : /* No clobbers */); 4551 return result; 4552 } 4553 4554 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 4555 vabdl_high_u32 (uint32x4_t a, uint32x4_t b) 4556 { 4557 uint64x2_t result; 4558 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s" 4559 : "=w"(result) 4560 : "w"(a), "w"(b) 4561 : /* No clobbers */); 4562 return result; 4563 } 4564 4565 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4566 vabdl_s8 (int8x8_t a, int8x8_t b) 4567 { 4568 int16x8_t result; 4569 __asm__ ("sabdl %0.8h, %1.8b, %2.8b" 4570 : "=w"(result) 4571 : "w"(a), "w"(b) 4572 : /* No clobbers */); 4573 return result; 4574 } 4575 4576 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4577 vabdl_s16 (int16x4_t a, int16x4_t b) 4578 { 4579 int32x4_t result; 4580 __asm__ ("sabdl %0.4s, %1.4h, %2.4h" 4581 : "=w"(result) 4582 : "w"(a), "w"(b) 4583 : /* No clobbers */); 4584 return result; 4585 } 4586 4587 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 4588 vabdl_s32 (int32x2_t a, int32x2_t b) 4589 { 4590 int64x2_t result; 4591 __asm__ ("sabdl %0.2d, %1.2s, %2.2s" 4592 : "=w"(result) 4593 : "w"(a), "w"(b) 4594 : /* No clobbers */); 4595 return result; 4596 } 4597 4598 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 4599 vabdl_u8 (uint8x8_t a, uint8x8_t b) 4600 { 4601 uint16x8_t result; 4602 __asm__ ("uabdl %0.8h, %1.8b, %2.8b" 4603 : "=w"(result) 4604 : "w"(a), "w"(b) 4605 : /* No clobbers */); 4606 return result; 4607 } 4608 4609 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 4610 vabdl_u16 (uint16x4_t a, uint16x4_t b) 4611 { 4612 uint32x4_t result; 4613 __asm__ ("uabdl %0.4s, %1.4h, %2.4h" 4614 : "=w"(result) 4615 : "w"(a), "w"(b) 4616 : /* No clobbers */); 4617 return result; 4618 } 4619 4620 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 4621 vabdl_u32 (uint32x2_t a, uint32x2_t b) 4622 { 4623 uint64x2_t result; 4624 __asm__ ("uabdl %0.2d, %1.2s, %2.2s" 4625 : "=w"(result) 4626 : "w"(a), "w"(b) 4627 : /* No clobbers */); 4628 return result; 4629 } 4630 4631 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 4632 vabdq_f32 (float32x4_t a, float32x4_t b) 4633 { 4634 float32x4_t result; 4635 __asm__ ("fabd %0.4s, %1.4s, %2.4s" 4636 : "=w"(result) 4637 : "w"(a), "w"(b) 4638 : /* No clobbers */); 4639 return result; 4640 } 4641 4642 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 4643 vabdq_f64 (float64x2_t a, float64x2_t b) 4644 { 4645 float64x2_t result; 4646 __asm__ ("fabd %0.2d, %1.2d, %2.2d" 4647 : "=w"(result) 4648 : "w"(a), "w"(b) 4649 : /* No clobbers */); 4650 return result; 4651 } 4652 4653 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 4654 vabdq_s8 (int8x16_t a, int8x16_t b) 4655 { 4656 int8x16_t result; 4657 __asm__ ("sabd %0.16b, %1.16b, %2.16b" 4658 : "=w"(result) 4659 : "w"(a), "w"(b) 4660 : /* No clobbers */); 4661 return result; 4662 } 4663 4664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4665 vabdq_s16 (int16x8_t a, int16x8_t b) 4666 { 4667 int16x8_t result; 4668 __asm__ ("sabd %0.8h, %1.8h, %2.8h" 4669 : "=w"(result) 4670 : "w"(a), "w"(b) 4671 : /* No clobbers */); 4672 return result; 4673 } 4674 4675 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4676 vabdq_s32 (int32x4_t a, int32x4_t b) 4677 { 4678 int32x4_t result; 4679 __asm__ ("sabd %0.4s, %1.4s, %2.4s" 4680 : "=w"(result) 4681 : "w"(a), "w"(b) 4682 : /* No clobbers */); 4683 return result; 4684 } 4685 4686 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 4687 vabdq_u8 (uint8x16_t a, uint8x16_t b) 4688 { 4689 uint8x16_t result; 4690 __asm__ ("uabd %0.16b, %1.16b, %2.16b" 4691 : "=w"(result) 4692 : "w"(a), "w"(b) 4693 : /* No clobbers */); 4694 return result; 4695 } 4696 4697 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 4698 vabdq_u16 (uint16x8_t a, uint16x8_t b) 4699 { 4700 uint16x8_t result; 4701 __asm__ ("uabd %0.8h, %1.8h, %2.8h" 4702 : "=w"(result) 4703 : "w"(a), "w"(b) 4704 : /* No clobbers */); 4705 return result; 4706 } 4707 4708 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 4709 vabdq_u32 (uint32x4_t a, uint32x4_t b) 4710 { 4711 uint32x4_t result; 4712 __asm__ ("uabd %0.4s, %1.4s, %2.4s" 4713 : "=w"(result) 4714 : "w"(a), "w"(b) 4715 : /* No clobbers */); 4716 return result; 4717 } 4718 4719 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 4720 vabds_f32 (float32_t a, float32_t b) 4721 { 4722 float32_t result; 4723 __asm__ ("fabd %s0, %s1, %s2" 4724 : "=w"(result) 4725 : "w"(a), "w"(b) 4726 : /* No clobbers */); 4727 return result; 4728 } 4729 4730 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 4731 vaddlv_s8 (int8x8_t a) 4732 { 4733 int16_t result; 4734 __asm__ ("saddlv %h0,%1.8b" 4735 : "=w"(result) 4736 : "w"(a) 4737 : /* No clobbers */); 4738 return result; 4739 } 4740 4741 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 4742 vaddlv_s16 (int16x4_t a) 4743 { 4744 int32_t result; 4745 __asm__ ("saddlv %s0,%1.4h" 4746 : "=w"(result) 4747 : "w"(a) 4748 : /* No clobbers */); 4749 return result; 4750 } 4751 4752 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 4753 vaddlv_u8 (uint8x8_t a) 4754 { 4755 uint16_t result; 4756 __asm__ ("uaddlv %h0,%1.8b" 4757 : "=w"(result) 4758 : "w"(a) 4759 : /* No clobbers */); 4760 return result; 4761 } 4762 4763 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 4764 vaddlv_u16 (uint16x4_t a) 4765 { 4766 uint32_t result; 4767 __asm__ ("uaddlv %s0,%1.4h" 4768 : "=w"(result) 4769 : "w"(a) 4770 : /* No clobbers */); 4771 return result; 4772 } 4773 4774 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 4775 vaddlvq_s8 (int8x16_t a) 4776 { 4777 int16_t result; 4778 __asm__ ("saddlv %h0,%1.16b" 4779 : "=w"(result) 4780 : "w"(a) 4781 : /* No clobbers */); 4782 return result; 4783 } 4784 4785 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 4786 vaddlvq_s16 (int16x8_t a) 4787 { 4788 int32_t result; 4789 __asm__ ("saddlv %s0,%1.8h" 4790 : "=w"(result) 4791 : "w"(a) 4792 : /* No clobbers */); 4793 return result; 4794 } 4795 4796 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 4797 vaddlvq_s32 (int32x4_t a) 4798 { 4799 int64_t result; 4800 __asm__ ("saddlv %d0,%1.4s" 4801 : "=w"(result) 4802 : "w"(a) 4803 : /* No clobbers */); 4804 return result; 4805 } 4806 4807 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 4808 vaddlvq_u8 (uint8x16_t a) 4809 { 4810 uint16_t result; 4811 __asm__ ("uaddlv %h0,%1.16b" 4812 : "=w"(result) 4813 : "w"(a) 4814 : /* No clobbers */); 4815 return result; 4816 } 4817 4818 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 4819 vaddlvq_u16 (uint16x8_t a) 4820 { 4821 uint32_t result; 4822 __asm__ ("uaddlv %s0,%1.8h" 4823 : "=w"(result) 4824 : "w"(a) 4825 : /* No clobbers */); 4826 return result; 4827 } 4828 4829 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 4830 vaddlvq_u32 (uint32x4_t a) 4831 { 4832 uint64_t result; 4833 __asm__ ("uaddlv %d0,%1.4s" 4834 : "=w"(result) 4835 : "w"(a) 4836 : /* No clobbers */); 4837 return result; 4838 } 4839 4840 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 4841 vcls_s8 (int8x8_t a) 4842 { 4843 int8x8_t result; 4844 __asm__ ("cls %0.8b,%1.8b" 4845 : "=w"(result) 4846 : "w"(a) 4847 : /* No clobbers */); 4848 return result; 4849 } 4850 4851 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 4852 vcls_s16 (int16x4_t a) 4853 { 4854 int16x4_t result; 4855 __asm__ ("cls %0.4h,%1.4h" 4856 : "=w"(result) 4857 : "w"(a) 4858 : /* No clobbers */); 4859 return result; 4860 } 4861 4862 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 4863 vcls_s32 (int32x2_t a) 4864 { 4865 int32x2_t result; 4866 __asm__ ("cls %0.2s,%1.2s" 4867 : "=w"(result) 4868 : "w"(a) 4869 : /* No clobbers */); 4870 return result; 4871 } 4872 4873 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 4874 vclsq_s8 (int8x16_t a) 4875 { 4876 int8x16_t result; 4877 __asm__ ("cls %0.16b,%1.16b" 4878 : "=w"(result) 4879 : "w"(a) 4880 : /* No clobbers */); 4881 return result; 4882 } 4883 4884 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 4885 vclsq_s16 (int16x8_t a) 4886 { 4887 int16x8_t result; 4888 __asm__ ("cls %0.8h,%1.8h" 4889 : "=w"(result) 4890 : "w"(a) 4891 : /* No clobbers */); 4892 return result; 4893 } 4894 4895 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 4896 vclsq_s32 (int32x4_t a) 4897 { 4898 int32x4_t result; 4899 __asm__ ("cls %0.4s,%1.4s" 4900 : "=w"(result) 4901 : "w"(a) 4902 : /* No clobbers */); 4903 return result; 4904 } 4905 4906 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 4907 vcnt_p8 (poly8x8_t a) 4908 { 4909 poly8x8_t result; 4910 __asm__ ("cnt %0.8b,%1.8b" 4911 : "=w"(result) 4912 : "w"(a) 4913 : /* No clobbers */); 4914 return result; 4915 } 4916 4917 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 4918 vcnt_s8 (int8x8_t a) 4919 { 4920 int8x8_t result; 4921 __asm__ ("cnt %0.8b,%1.8b" 4922 : "=w"(result) 4923 : "w"(a) 4924 : /* No clobbers */); 4925 return result; 4926 } 4927 4928 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 4929 vcnt_u8 (uint8x8_t a) 4930 { 4931 uint8x8_t result; 4932 __asm__ ("cnt %0.8b,%1.8b" 4933 : "=w"(result) 4934 : "w"(a) 4935 : /* No clobbers */); 4936 return result; 4937 } 4938 4939 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 4940 vcntq_p8 (poly8x16_t a) 4941 { 4942 poly8x16_t result; 4943 __asm__ ("cnt %0.16b,%1.16b" 4944 : "=w"(result) 4945 : "w"(a) 4946 : /* No clobbers */); 4947 return result; 4948 } 4949 4950 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 4951 vcntq_s8 (int8x16_t a) 4952 { 4953 int8x16_t result; 4954 __asm__ ("cnt %0.16b,%1.16b" 4955 : "=w"(result) 4956 : "w"(a) 4957 : /* No clobbers */); 4958 return result; 4959 } 4960 4961 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 4962 vcntq_u8 (uint8x16_t a) 4963 { 4964 uint8x16_t result; 4965 __asm__ ("cnt %0.16b,%1.16b" 4966 : "=w"(result) 4967 : "w"(a) 4968 : /* No clobbers */); 4969 return result; 4970 } 4971 4972 #define vcopyq_lane_f32(a, b, c, d) \ 4973 __extension__ \ 4974 ({ \ 4975 float32x4_t c_ = (c); \ 4976 float32x4_t a_ = (a); \ 4977 float32x4_t result; \ 4978 __asm__ ("ins %0.s[%2], %3.s[%4]" \ 4979 : "=w"(result) \ 4980 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 4981 : /* No clobbers */); \ 4982 result; \ 4983 }) 4984 4985 #define vcopyq_lane_f64(a, b, c, d) \ 4986 __extension__ \ 4987 ({ \ 4988 float64x2_t c_ = (c); \ 4989 float64x2_t a_ = (a); \ 4990 float64x2_t result; \ 4991 __asm__ ("ins %0.d[%2], %3.d[%4]" \ 4992 : "=w"(result) \ 4993 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 4994 : /* No clobbers */); \ 4995 result; \ 4996 }) 4997 4998 #define vcopyq_lane_p8(a, b, c, d) \ 4999 __extension__ \ 5000 ({ \ 5001 poly8x16_t c_ = (c); \ 5002 poly8x16_t a_ = (a); \ 5003 poly8x16_t result; \ 5004 __asm__ ("ins %0.b[%2], %3.b[%4]" \ 5005 : "=w"(result) \ 5006 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5007 : /* No clobbers */); \ 5008 result; \ 5009 }) 5010 5011 #define vcopyq_lane_p16(a, b, c, d) \ 5012 __extension__ \ 5013 ({ \ 5014 poly16x8_t c_ = (c); \ 5015 poly16x8_t a_ = (a); \ 5016 poly16x8_t result; \ 5017 __asm__ ("ins %0.h[%2], %3.h[%4]" \ 5018 : "=w"(result) \ 5019 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5020 : /* No clobbers */); \ 5021 result; \ 5022 }) 5023 5024 #define vcopyq_lane_s8(a, b, c, d) \ 5025 __extension__ \ 5026 ({ \ 5027 int8x16_t c_ = (c); \ 5028 int8x16_t a_ = (a); \ 5029 int8x16_t result; \ 5030 __asm__ ("ins %0.b[%2], %3.b[%4]" \ 5031 : "=w"(result) \ 5032 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5033 : /* No clobbers */); \ 5034 result; \ 5035 }) 5036 5037 #define vcopyq_lane_s16(a, b, c, d) \ 5038 __extension__ \ 5039 ({ \ 5040 int16x8_t c_ = (c); \ 5041 int16x8_t a_ = (a); \ 5042 int16x8_t result; \ 5043 __asm__ ("ins %0.h[%2], %3.h[%4]" \ 5044 : "=w"(result) \ 5045 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5046 : /* No clobbers */); \ 5047 result; \ 5048 }) 5049 5050 #define vcopyq_lane_s32(a, b, c, d) \ 5051 __extension__ \ 5052 ({ \ 5053 int32x4_t c_ = (c); \ 5054 int32x4_t a_ = (a); \ 5055 int32x4_t result; \ 5056 __asm__ ("ins %0.s[%2], %3.s[%4]" \ 5057 : "=w"(result) \ 5058 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5059 : /* No clobbers */); \ 5060 result; \ 5061 }) 5062 5063 #define vcopyq_lane_s64(a, b, c, d) \ 5064 __extension__ \ 5065 ({ \ 5066 int64x2_t c_ = (c); \ 5067 int64x2_t a_ = (a); \ 5068 int64x2_t result; \ 5069 __asm__ ("ins %0.d[%2], %3.d[%4]" \ 5070 : "=w"(result) \ 5071 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5072 : /* No clobbers */); \ 5073 result; \ 5074 }) 5075 5076 #define vcopyq_lane_u8(a, b, c, d) \ 5077 __extension__ \ 5078 ({ \ 5079 uint8x16_t c_ = (c); \ 5080 uint8x16_t a_ = (a); \ 5081 uint8x16_t result; \ 5082 __asm__ ("ins %0.b[%2], %3.b[%4]" \ 5083 : "=w"(result) \ 5084 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5085 : /* No clobbers */); \ 5086 result; \ 5087 }) 5088 5089 #define vcopyq_lane_u16(a, b, c, d) \ 5090 __extension__ \ 5091 ({ \ 5092 uint16x8_t c_ = (c); \ 5093 uint16x8_t a_ = (a); \ 5094 uint16x8_t result; \ 5095 __asm__ ("ins %0.h[%2], %3.h[%4]" \ 5096 : "=w"(result) \ 5097 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5098 : /* No clobbers */); \ 5099 result; \ 5100 }) 5101 5102 #define vcopyq_lane_u32(a, b, c, d) \ 5103 __extension__ \ 5104 ({ \ 5105 uint32x4_t c_ = (c); \ 5106 uint32x4_t a_ = (a); \ 5107 uint32x4_t result; \ 5108 __asm__ ("ins %0.s[%2], %3.s[%4]" \ 5109 : "=w"(result) \ 5110 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5111 : /* No clobbers */); \ 5112 result; \ 5113 }) 5114 5115 #define vcopyq_lane_u64(a, b, c, d) \ 5116 __extension__ \ 5117 ({ \ 5118 uint64x2_t c_ = (c); \ 5119 uint64x2_t a_ = (a); \ 5120 uint64x2_t result; \ 5121 __asm__ ("ins %0.d[%2], %3.d[%4]" \ 5122 : "=w"(result) \ 5123 : "0"(a_), "i"(b), "w"(c_), "i"(d) \ 5124 : /* No clobbers */); \ 5125 result; \ 5126 }) 5127 5128 /* vcvt_f16_f32 not supported */ 5129 5130 /* vcvt_f32_f16 not supported */ 5131 5132 /* vcvt_high_f16_f32 not supported */ 5133 5134 /* vcvt_high_f32_f16 not supported */ 5135 5136 static float32x2_t vdup_n_f32 (float32_t); 5137 5138 #define vcvt_n_f32_s32(a, b) \ 5139 __extension__ \ 5140 ({ \ 5141 int32x2_t a_ = (a); \ 5142 float32x2_t result; \ 5143 __asm__ ("scvtf %0.2s, %1.2s, #%2" \ 5144 : "=w"(result) \ 5145 : "w"(a_), "i"(b) \ 5146 : /* No clobbers */); \ 5147 result; \ 5148 }) 5149 5150 #define vcvt_n_f32_u32(a, b) \ 5151 __extension__ \ 5152 ({ \ 5153 uint32x2_t a_ = (a); \ 5154 float32x2_t result; \ 5155 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \ 5156 : "=w"(result) \ 5157 : "w"(a_), "i"(b) \ 5158 : /* No clobbers */); \ 5159 result; \ 5160 }) 5161 5162 #define vcvt_n_s32_f32(a, b) \ 5163 __extension__ \ 5164 ({ \ 5165 float32x2_t a_ = (a); \ 5166 int32x2_t result; \ 5167 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \ 5168 : "=w"(result) \ 5169 : "w"(a_), "i"(b) \ 5170 : /* No clobbers */); \ 5171 result; \ 5172 }) 5173 5174 #define vcvt_n_u32_f32(a, b) \ 5175 __extension__ \ 5176 ({ \ 5177 float32x2_t a_ = (a); \ 5178 uint32x2_t result; \ 5179 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \ 5180 : "=w"(result) \ 5181 : "w"(a_), "i"(b) \ 5182 : /* No clobbers */); \ 5183 result; \ 5184 }) 5185 5186 #define vcvtd_n_f64_s64(a, b) \ 5187 __extension__ \ 5188 ({ \ 5189 int64_t a_ = (a); \ 5190 float64_t result; \ 5191 __asm__ ("scvtf %d0,%d1,%2" \ 5192 : "=w"(result) \ 5193 : "w"(a_), "i"(b) \ 5194 : /* No clobbers */); \ 5195 result; \ 5196 }) 5197 5198 #define vcvtd_n_f64_u64(a, b) \ 5199 __extension__ \ 5200 ({ \ 5201 uint64_t a_ = (a); \ 5202 float64_t result; \ 5203 __asm__ ("ucvtf %d0,%d1,%2" \ 5204 : "=w"(result) \ 5205 : "w"(a_), "i"(b) \ 5206 : /* No clobbers */); \ 5207 result; \ 5208 }) 5209 5210 #define vcvtd_n_s64_f64(a, b) \ 5211 __extension__ \ 5212 ({ \ 5213 float64_t a_ = (a); \ 5214 int64_t result; \ 5215 __asm__ ("fcvtzs %d0,%d1,%2" \ 5216 : "=w"(result) \ 5217 : "w"(a_), "i"(b) \ 5218 : /* No clobbers */); \ 5219 result; \ 5220 }) 5221 5222 #define vcvtd_n_u64_f64(a, b) \ 5223 __extension__ \ 5224 ({ \ 5225 float64_t a_ = (a); \ 5226 uint64_t result; \ 5227 __asm__ ("fcvtzu %d0,%d1,%2" \ 5228 : "=w"(result) \ 5229 : "w"(a_), "i"(b) \ 5230 : /* No clobbers */); \ 5231 result; \ 5232 }) 5233 5234 #define vcvtq_n_f32_s32(a, b) \ 5235 __extension__ \ 5236 ({ \ 5237 int32x4_t a_ = (a); \ 5238 float32x4_t result; \ 5239 __asm__ ("scvtf %0.4s, %1.4s, #%2" \ 5240 : "=w"(result) \ 5241 : "w"(a_), "i"(b) \ 5242 : /* No clobbers */); \ 5243 result; \ 5244 }) 5245 5246 #define vcvtq_n_f32_u32(a, b) \ 5247 __extension__ \ 5248 ({ \ 5249 uint32x4_t a_ = (a); \ 5250 float32x4_t result; \ 5251 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \ 5252 : "=w"(result) \ 5253 : "w"(a_), "i"(b) \ 5254 : /* No clobbers */); \ 5255 result; \ 5256 }) 5257 5258 #define vcvtq_n_f64_s64(a, b) \ 5259 __extension__ \ 5260 ({ \ 5261 int64x2_t a_ = (a); \ 5262 float64x2_t result; \ 5263 __asm__ ("scvtf %0.2d, %1.2d, #%2" \ 5264 : "=w"(result) \ 5265 : "w"(a_), "i"(b) \ 5266 : /* No clobbers */); \ 5267 result; \ 5268 }) 5269 5270 #define vcvtq_n_f64_u64(a, b) \ 5271 __extension__ \ 5272 ({ \ 5273 uint64x2_t a_ = (a); \ 5274 float64x2_t result; \ 5275 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \ 5276 : "=w"(result) \ 5277 : "w"(a_), "i"(b) \ 5278 : /* No clobbers */); \ 5279 result; \ 5280 }) 5281 5282 #define vcvtq_n_s32_f32(a, b) \ 5283 __extension__ \ 5284 ({ \ 5285 float32x4_t a_ = (a); \ 5286 int32x4_t result; \ 5287 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \ 5288 : "=w"(result) \ 5289 : "w"(a_), "i"(b) \ 5290 : /* No clobbers */); \ 5291 result; \ 5292 }) 5293 5294 #define vcvtq_n_s64_f64(a, b) \ 5295 __extension__ \ 5296 ({ \ 5297 float64x2_t a_ = (a); \ 5298 int64x2_t result; \ 5299 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \ 5300 : "=w"(result) \ 5301 : "w"(a_), "i"(b) \ 5302 : /* No clobbers */); \ 5303 result; \ 5304 }) 5305 5306 #define vcvtq_n_u32_f32(a, b) \ 5307 __extension__ \ 5308 ({ \ 5309 float32x4_t a_ = (a); \ 5310 uint32x4_t result; \ 5311 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \ 5312 : "=w"(result) \ 5313 : "w"(a_), "i"(b) \ 5314 : /* No clobbers */); \ 5315 result; \ 5316 }) 5317 5318 #define vcvtq_n_u64_f64(a, b) \ 5319 __extension__ \ 5320 ({ \ 5321 float64x2_t a_ = (a); \ 5322 uint64x2_t result; \ 5323 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \ 5324 : "=w"(result) \ 5325 : "w"(a_), "i"(b) \ 5326 : /* No clobbers */); \ 5327 result; \ 5328 }) 5329 5330 #define vcvts_n_f32_s32(a, b) \ 5331 __extension__ \ 5332 ({ \ 5333 int32_t a_ = (a); \ 5334 float32_t result; \ 5335 __asm__ ("scvtf %s0,%s1,%2" \ 5336 : "=w"(result) \ 5337 : "w"(a_), "i"(b) \ 5338 : /* No clobbers */); \ 5339 result; \ 5340 }) 5341 5342 #define vcvts_n_f32_u32(a, b) \ 5343 __extension__ \ 5344 ({ \ 5345 uint32_t a_ = (a); \ 5346 float32_t result; \ 5347 __asm__ ("ucvtf %s0,%s1,%2" \ 5348 : "=w"(result) \ 5349 : "w"(a_), "i"(b) \ 5350 : /* No clobbers */); \ 5351 result; \ 5352 }) 5353 5354 #define vcvts_n_s32_f32(a, b) \ 5355 __extension__ \ 5356 ({ \ 5357 float32_t a_ = (a); \ 5358 int32_t result; \ 5359 __asm__ ("fcvtzs %s0,%s1,%2" \ 5360 : "=w"(result) \ 5361 : "w"(a_), "i"(b) \ 5362 : /* No clobbers */); \ 5363 result; \ 5364 }) 5365 5366 #define vcvts_n_u32_f32(a, b) \ 5367 __extension__ \ 5368 ({ \ 5369 float32_t a_ = (a); \ 5370 uint32_t result; \ 5371 __asm__ ("fcvtzu %s0,%s1,%2" \ 5372 : "=w"(result) \ 5373 : "w"(a_), "i"(b) \ 5374 : /* No clobbers */); \ 5375 result; \ 5376 }) 5377 5378 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 5379 vcvtx_f32_f64 (float64x2_t a) 5380 { 5381 float32x2_t result; 5382 __asm__ ("fcvtxn %0.2s,%1.2d" 5383 : "=w"(result) 5384 : "w"(a) 5385 : /* No clobbers */); 5386 return result; 5387 } 5388 5389 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 5390 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b) 5391 { 5392 float32x4_t result; 5393 __asm__ ("fcvtxn2 %0.4s,%1.2d" 5394 : "=w"(result) 5395 : "w" (b), "0"(a) 5396 : /* No clobbers */); 5397 return result; 5398 } 5399 5400 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 5401 vcvtxd_f32_f64 (float64_t a) 5402 { 5403 float32_t result; 5404 __asm__ ("fcvtxn %s0,%d1" 5405 : "=w"(result) 5406 : "w"(a) 5407 : /* No clobbers */); 5408 return result; 5409 } 5410 5411 #define vext_f32(a, b, c) \ 5412 __extension__ \ 5413 ({ \ 5414 float32x2_t b_ = (b); \ 5415 float32x2_t a_ = (a); \ 5416 float32x2_t result; \ 5417 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ 5418 : "=w"(result) \ 5419 : "w"(a_), "w"(b_), "i"(c) \ 5420 : /* No clobbers */); \ 5421 result; \ 5422 }) 5423 5424 #define vext_f64(a, b, c) \ 5425 __extension__ \ 5426 ({ \ 5427 float64x1_t b_ = (b); \ 5428 float64x1_t a_ = (a); \ 5429 float64x1_t result; \ 5430 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ 5431 : "=w"(result) \ 5432 : "w"(a_), "w"(b_), "i"(c) \ 5433 : /* No clobbers */); \ 5434 result; \ 5435 }) 5436 5437 #define vext_p8(a, b, c) \ 5438 __extension__ \ 5439 ({ \ 5440 poly8x8_t b_ = (b); \ 5441 poly8x8_t a_ = (a); \ 5442 poly8x8_t result; \ 5443 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ 5444 : "=w"(result) \ 5445 : "w"(a_), "w"(b_), "i"(c) \ 5446 : /* No clobbers */); \ 5447 result; \ 5448 }) 5449 5450 #define vext_p16(a, b, c) \ 5451 __extension__ \ 5452 ({ \ 5453 poly16x4_t b_ = (b); \ 5454 poly16x4_t a_ = (a); \ 5455 poly16x4_t result; \ 5456 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ 5457 : "=w"(result) \ 5458 : "w"(a_), "w"(b_), "i"(c) \ 5459 : /* No clobbers */); \ 5460 result; \ 5461 }) 5462 5463 #define vext_s8(a, b, c) \ 5464 __extension__ \ 5465 ({ \ 5466 int8x8_t b_ = (b); \ 5467 int8x8_t a_ = (a); \ 5468 int8x8_t result; \ 5469 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ 5470 : "=w"(result) \ 5471 : "w"(a_), "w"(b_), "i"(c) \ 5472 : /* No clobbers */); \ 5473 result; \ 5474 }) 5475 5476 #define vext_s16(a, b, c) \ 5477 __extension__ \ 5478 ({ \ 5479 int16x4_t b_ = (b); \ 5480 int16x4_t a_ = (a); \ 5481 int16x4_t result; \ 5482 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ 5483 : "=w"(result) \ 5484 : "w"(a_), "w"(b_), "i"(c) \ 5485 : /* No clobbers */); \ 5486 result; \ 5487 }) 5488 5489 #define vext_s32(a, b, c) \ 5490 __extension__ \ 5491 ({ \ 5492 int32x2_t b_ = (b); \ 5493 int32x2_t a_ = (a); \ 5494 int32x2_t result; \ 5495 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ 5496 : "=w"(result) \ 5497 : "w"(a_), "w"(b_), "i"(c) \ 5498 : /* No clobbers */); \ 5499 result; \ 5500 }) 5501 5502 #define vext_s64(a, b, c) \ 5503 __extension__ \ 5504 ({ \ 5505 int64x1_t b_ = (b); \ 5506 int64x1_t a_ = (a); \ 5507 int64x1_t result; \ 5508 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ 5509 : "=w"(result) \ 5510 : "w"(a_), "w"(b_), "i"(c) \ 5511 : /* No clobbers */); \ 5512 result; \ 5513 }) 5514 5515 #define vext_u8(a, b, c) \ 5516 __extension__ \ 5517 ({ \ 5518 uint8x8_t b_ = (b); \ 5519 uint8x8_t a_ = (a); \ 5520 uint8x8_t result; \ 5521 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ 5522 : "=w"(result) \ 5523 : "w"(a_), "w"(b_), "i"(c) \ 5524 : /* No clobbers */); \ 5525 result; \ 5526 }) 5527 5528 #define vext_u16(a, b, c) \ 5529 __extension__ \ 5530 ({ \ 5531 uint16x4_t b_ = (b); \ 5532 uint16x4_t a_ = (a); \ 5533 uint16x4_t result; \ 5534 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ 5535 : "=w"(result) \ 5536 : "w"(a_), "w"(b_), "i"(c) \ 5537 : /* No clobbers */); \ 5538 result; \ 5539 }) 5540 5541 #define vext_u32(a, b, c) \ 5542 __extension__ \ 5543 ({ \ 5544 uint32x2_t b_ = (b); \ 5545 uint32x2_t a_ = (a); \ 5546 uint32x2_t result; \ 5547 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ 5548 : "=w"(result) \ 5549 : "w"(a_), "w"(b_), "i"(c) \ 5550 : /* No clobbers */); \ 5551 result; \ 5552 }) 5553 5554 #define vext_u64(a, b, c) \ 5555 __extension__ \ 5556 ({ \ 5557 uint64x1_t b_ = (b); \ 5558 uint64x1_t a_ = (a); \ 5559 uint64x1_t result; \ 5560 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ 5561 : "=w"(result) \ 5562 : "w"(a_), "w"(b_), "i"(c) \ 5563 : /* No clobbers */); \ 5564 result; \ 5565 }) 5566 5567 #define vextq_f32(a, b, c) \ 5568 __extension__ \ 5569 ({ \ 5570 float32x4_t b_ = (b); \ 5571 float32x4_t a_ = (a); \ 5572 float32x4_t result; \ 5573 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ 5574 : "=w"(result) \ 5575 : "w"(a_), "w"(b_), "i"(c) \ 5576 : /* No clobbers */); \ 5577 result; \ 5578 }) 5579 5580 #define vextq_f64(a, b, c) \ 5581 __extension__ \ 5582 ({ \ 5583 float64x2_t b_ = (b); \ 5584 float64x2_t a_ = (a); \ 5585 float64x2_t result; \ 5586 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ 5587 : "=w"(result) \ 5588 : "w"(a_), "w"(b_), "i"(c) \ 5589 : /* No clobbers */); \ 5590 result; \ 5591 }) 5592 5593 #define vextq_p8(a, b, c) \ 5594 __extension__ \ 5595 ({ \ 5596 poly8x16_t b_ = (b); \ 5597 poly8x16_t a_ = (a); \ 5598 poly8x16_t result; \ 5599 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ 5600 : "=w"(result) \ 5601 : "w"(a_), "w"(b_), "i"(c) \ 5602 : /* No clobbers */); \ 5603 result; \ 5604 }) 5605 5606 #define vextq_p16(a, b, c) \ 5607 __extension__ \ 5608 ({ \ 5609 poly16x8_t b_ = (b); \ 5610 poly16x8_t a_ = (a); \ 5611 poly16x8_t result; \ 5612 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ 5613 : "=w"(result) \ 5614 : "w"(a_), "w"(b_), "i"(c) \ 5615 : /* No clobbers */); \ 5616 result; \ 5617 }) 5618 5619 #define vextq_s8(a, b, c) \ 5620 __extension__ \ 5621 ({ \ 5622 int8x16_t b_ = (b); \ 5623 int8x16_t a_ = (a); \ 5624 int8x16_t result; \ 5625 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ 5626 : "=w"(result) \ 5627 : "w"(a_), "w"(b_), "i"(c) \ 5628 : /* No clobbers */); \ 5629 result; \ 5630 }) 5631 5632 #define vextq_s16(a, b, c) \ 5633 __extension__ \ 5634 ({ \ 5635 int16x8_t b_ = (b); \ 5636 int16x8_t a_ = (a); \ 5637 int16x8_t result; \ 5638 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ 5639 : "=w"(result) \ 5640 : "w"(a_), "w"(b_), "i"(c) \ 5641 : /* No clobbers */); \ 5642 result; \ 5643 }) 5644 5645 #define vextq_s32(a, b, c) \ 5646 __extension__ \ 5647 ({ \ 5648 int32x4_t b_ = (b); \ 5649 int32x4_t a_ = (a); \ 5650 int32x4_t result; \ 5651 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ 5652 : "=w"(result) \ 5653 : "w"(a_), "w"(b_), "i"(c) \ 5654 : /* No clobbers */); \ 5655 result; \ 5656 }) 5657 5658 #define vextq_s64(a, b, c) \ 5659 __extension__ \ 5660 ({ \ 5661 int64x2_t b_ = (b); \ 5662 int64x2_t a_ = (a); \ 5663 int64x2_t result; \ 5664 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ 5665 : "=w"(result) \ 5666 : "w"(a_), "w"(b_), "i"(c) \ 5667 : /* No clobbers */); \ 5668 result; \ 5669 }) 5670 5671 #define vextq_u8(a, b, c) \ 5672 __extension__ \ 5673 ({ \ 5674 uint8x16_t b_ = (b); \ 5675 uint8x16_t a_ = (a); \ 5676 uint8x16_t result; \ 5677 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ 5678 : "=w"(result) \ 5679 : "w"(a_), "w"(b_), "i"(c) \ 5680 : /* No clobbers */); \ 5681 result; \ 5682 }) 5683 5684 #define vextq_u16(a, b, c) \ 5685 __extension__ \ 5686 ({ \ 5687 uint16x8_t b_ = (b); \ 5688 uint16x8_t a_ = (a); \ 5689 uint16x8_t result; \ 5690 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ 5691 : "=w"(result) \ 5692 : "w"(a_), "w"(b_), "i"(c) \ 5693 : /* No clobbers */); \ 5694 result; \ 5695 }) 5696 5697 #define vextq_u32(a, b, c) \ 5698 __extension__ \ 5699 ({ \ 5700 uint32x4_t b_ = (b); \ 5701 uint32x4_t a_ = (a); \ 5702 uint32x4_t result; \ 5703 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ 5704 : "=w"(result) \ 5705 : "w"(a_), "w"(b_), "i"(c) \ 5706 : /* No clobbers */); \ 5707 result; \ 5708 }) 5709 5710 #define vextq_u64(a, b, c) \ 5711 __extension__ \ 5712 ({ \ 5713 uint64x2_t b_ = (b); \ 5714 uint64x2_t a_ = (a); \ 5715 uint64x2_t result; \ 5716 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ 5717 : "=w"(result) \ 5718 : "w"(a_), "w"(b_), "i"(c) \ 5719 : /* No clobbers */); \ 5720 result; \ 5721 }) 5722 5723 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 5724 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) 5725 { 5726 float32x2_t result; 5727 __asm__ ("fmla %0.2s,%2.2s,%3.2s" 5728 : "=w"(result) 5729 : "0"(a), "w"(b), "w"(c) 5730 : /* No clobbers */); 5731 return result; 5732 } 5733 5734 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 5735 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) 5736 { 5737 float32x4_t result; 5738 __asm__ ("fmla %0.4s,%2.4s,%3.4s" 5739 : "=w"(result) 5740 : "0"(a), "w"(b), "w"(c) 5741 : /* No clobbers */); 5742 return result; 5743 } 5744 5745 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 5746 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) 5747 { 5748 float64x2_t result; 5749 __asm__ ("fmla %0.2d,%2.2d,%3.2d" 5750 : "=w"(result) 5751 : "0"(a), "w"(b), "w"(c) 5752 : /* No clobbers */); 5753 return result; 5754 } 5755 5756 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 5757 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c) 5758 { 5759 float32x2_t result; 5760 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]" 5761 : "=w"(result) 5762 : "0"(a), "w"(b), "w"(c) 5763 : /* No clobbers */); 5764 return result; 5765 } 5766 5767 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 5768 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) 5769 { 5770 float32x4_t result; 5771 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]" 5772 : "=w"(result) 5773 : "0"(a), "w"(b), "w"(c) 5774 : /* No clobbers */); 5775 return result; 5776 } 5777 5778 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 5779 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) 5780 { 5781 float64x2_t result; 5782 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]" 5783 : "=w"(result) 5784 : "0"(a), "w"(b), "w"(c) 5785 : /* No clobbers */); 5786 return result; 5787 } 5788 5789 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 5790 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c) 5791 { 5792 float32x2_t result; 5793 __asm__ ("fmls %0.2s,%2.2s,%3.2s" 5794 : "=w"(result) 5795 : "0"(a), "w"(b), "w"(c) 5796 : /* No clobbers */); 5797 return result; 5798 } 5799 5800 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 5801 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) 5802 { 5803 float32x4_t result; 5804 __asm__ ("fmls %0.4s,%2.4s,%3.4s" 5805 : "=w"(result) 5806 : "0"(a), "w"(b), "w"(c) 5807 : /* No clobbers */); 5808 return result; 5809 } 5810 5811 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 5812 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) 5813 { 5814 float64x2_t result; 5815 __asm__ ("fmls %0.2d,%2.2d,%3.2d" 5816 : "=w"(result) 5817 : "0"(a), "w"(b), "w"(c) 5818 : /* No clobbers */); 5819 return result; 5820 } 5821 5822 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 5823 vget_high_f32 (float32x4_t a) 5824 { 5825 float32x2_t result; 5826 __asm__ ("ins %0.d[0], %1.d[1]" 5827 : "=w"(result) 5828 : "w"(a) 5829 : /* No clobbers */); 5830 return result; 5831 } 5832 5833 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 5834 vget_high_f64 (float64x2_t a) 5835 { 5836 float64x1_t result; 5837 __asm__ ("ins %0.d[0], %1.d[1]" 5838 : "=w"(result) 5839 : "w"(a) 5840 : /* No clobbers */); 5841 return result; 5842 } 5843 5844 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 5845 vget_high_p8 (poly8x16_t a) 5846 { 5847 poly8x8_t result; 5848 __asm__ ("ins %0.d[0], %1.d[1]" 5849 : "=w"(result) 5850 : "w"(a) 5851 : /* No clobbers */); 5852 return result; 5853 } 5854 5855 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 5856 vget_high_p16 (poly16x8_t a) 5857 { 5858 poly16x4_t result; 5859 __asm__ ("ins %0.d[0], %1.d[1]" 5860 : "=w"(result) 5861 : "w"(a) 5862 : /* No clobbers */); 5863 return result; 5864 } 5865 5866 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 5867 vget_high_s8 (int8x16_t a) 5868 { 5869 int8x8_t result; 5870 __asm__ ("ins %0.d[0], %1.d[1]" 5871 : "=w"(result) 5872 : "w"(a) 5873 : /* No clobbers */); 5874 return result; 5875 } 5876 5877 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 5878 vget_high_s16 (int16x8_t a) 5879 { 5880 int16x4_t result; 5881 __asm__ ("ins %0.d[0], %1.d[1]" 5882 : "=w"(result) 5883 : "w"(a) 5884 : /* No clobbers */); 5885 return result; 5886 } 5887 5888 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 5889 vget_high_s32 (int32x4_t a) 5890 { 5891 int32x2_t result; 5892 __asm__ ("ins %0.d[0], %1.d[1]" 5893 : "=w"(result) 5894 : "w"(a) 5895 : /* No clobbers */); 5896 return result; 5897 } 5898 5899 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 5900 vget_high_s64 (int64x2_t a) 5901 { 5902 int64x1_t result; 5903 __asm__ ("ins %0.d[0], %1.d[1]" 5904 : "=w"(result) 5905 : "w"(a) 5906 : /* No clobbers */); 5907 return result; 5908 } 5909 5910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 5911 vget_high_u8 (uint8x16_t a) 5912 { 5913 uint8x8_t result; 5914 __asm__ ("ins %0.d[0], %1.d[1]" 5915 : "=w"(result) 5916 : "w"(a) 5917 : /* No clobbers */); 5918 return result; 5919 } 5920 5921 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 5922 vget_high_u16 (uint16x8_t a) 5923 { 5924 uint16x4_t result; 5925 __asm__ ("ins %0.d[0], %1.d[1]" 5926 : "=w"(result) 5927 : "w"(a) 5928 : /* No clobbers */); 5929 return result; 5930 } 5931 5932 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 5933 vget_high_u32 (uint32x4_t a) 5934 { 5935 uint32x2_t result; 5936 __asm__ ("ins %0.d[0], %1.d[1]" 5937 : "=w"(result) 5938 : "w"(a) 5939 : /* No clobbers */); 5940 return result; 5941 } 5942 5943 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 5944 vget_high_u64 (uint64x2_t a) 5945 { 5946 uint64x1_t result; 5947 __asm__ ("ins %0.d[0], %1.d[1]" 5948 : "=w"(result) 5949 : "w"(a) 5950 : /* No clobbers */); 5951 return result; 5952 } 5953 5954 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 5955 vhsub_s8 (int8x8_t a, int8x8_t b) 5956 { 5957 int8x8_t result; 5958 __asm__ ("shsub %0.8b, %1.8b, %2.8b" 5959 : "=w"(result) 5960 : "w"(a), "w"(b) 5961 : /* No clobbers */); 5962 return result; 5963 } 5964 5965 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 5966 vhsub_s16 (int16x4_t a, int16x4_t b) 5967 { 5968 int16x4_t result; 5969 __asm__ ("shsub %0.4h, %1.4h, %2.4h" 5970 : "=w"(result) 5971 : "w"(a), "w"(b) 5972 : /* No clobbers */); 5973 return result; 5974 } 5975 5976 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 5977 vhsub_s32 (int32x2_t a, int32x2_t b) 5978 { 5979 int32x2_t result; 5980 __asm__ ("shsub %0.2s, %1.2s, %2.2s" 5981 : "=w"(result) 5982 : "w"(a), "w"(b) 5983 : /* No clobbers */); 5984 return result; 5985 } 5986 5987 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 5988 vhsub_u8 (uint8x8_t a, uint8x8_t b) 5989 { 5990 uint8x8_t result; 5991 __asm__ ("uhsub %0.8b, %1.8b, %2.8b" 5992 : "=w"(result) 5993 : "w"(a), "w"(b) 5994 : /* No clobbers */); 5995 return result; 5996 } 5997 5998 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 5999 vhsub_u16 (uint16x4_t a, uint16x4_t b) 6000 { 6001 uint16x4_t result; 6002 __asm__ ("uhsub %0.4h, %1.4h, %2.4h" 6003 : "=w"(result) 6004 : "w"(a), "w"(b) 6005 : /* No clobbers */); 6006 return result; 6007 } 6008 6009 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 6010 vhsub_u32 (uint32x2_t a, uint32x2_t b) 6011 { 6012 uint32x2_t result; 6013 __asm__ ("uhsub %0.2s, %1.2s, %2.2s" 6014 : "=w"(result) 6015 : "w"(a), "w"(b) 6016 : /* No clobbers */); 6017 return result; 6018 } 6019 6020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 6021 vhsubq_s8 (int8x16_t a, int8x16_t b) 6022 { 6023 int8x16_t result; 6024 __asm__ ("shsub %0.16b, %1.16b, %2.16b" 6025 : "=w"(result) 6026 : "w"(a), "w"(b) 6027 : /* No clobbers */); 6028 return result; 6029 } 6030 6031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 6032 vhsubq_s16 (int16x8_t a, int16x8_t b) 6033 { 6034 int16x8_t result; 6035 __asm__ ("shsub %0.8h, %1.8h, %2.8h" 6036 : "=w"(result) 6037 : "w"(a), "w"(b) 6038 : /* No clobbers */); 6039 return result; 6040 } 6041 6042 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 6043 vhsubq_s32 (int32x4_t a, int32x4_t b) 6044 { 6045 int32x4_t result; 6046 __asm__ ("shsub %0.4s, %1.4s, %2.4s" 6047 : "=w"(result) 6048 : "w"(a), "w"(b) 6049 : /* No clobbers */); 6050 return result; 6051 } 6052 6053 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 6054 vhsubq_u8 (uint8x16_t a, uint8x16_t b) 6055 { 6056 uint8x16_t result; 6057 __asm__ ("uhsub %0.16b, %1.16b, %2.16b" 6058 : "=w"(result) 6059 : "w"(a), "w"(b) 6060 : /* No clobbers */); 6061 return result; 6062 } 6063 6064 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 6065 vhsubq_u16 (uint16x8_t a, uint16x8_t b) 6066 { 6067 uint16x8_t result; 6068 __asm__ ("uhsub %0.8h, %1.8h, %2.8h" 6069 : "=w"(result) 6070 : "w"(a), "w"(b) 6071 : /* No clobbers */); 6072 return result; 6073 } 6074 6075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 6076 vhsubq_u32 (uint32x4_t a, uint32x4_t b) 6077 { 6078 uint32x4_t result; 6079 __asm__ ("uhsub %0.4s, %1.4s, %2.4s" 6080 : "=w"(result) 6081 : "w"(a), "w"(b) 6082 : /* No clobbers */); 6083 return result; 6084 } 6085 6086 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 6087 vld1_dup_f32 (const float32_t * a) 6088 { 6089 float32x2_t result; 6090 __asm__ ("ld1r {%0.2s}, %1" 6091 : "=w"(result) 6092 : "Utv"(*a) 6093 : /* No clobbers */); 6094 return result; 6095 } 6096 6097 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 6098 vld1_dup_f64 (const float64_t * a) 6099 { 6100 float64x1_t result; 6101 __asm__ ("ld1r {%0.1d}, %1" 6102 : "=w"(result) 6103 : "Utv"(*a) 6104 : /* No clobbers */); 6105 return result; 6106 } 6107 6108 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 6109 vld1_dup_p8 (const poly8_t * a) 6110 { 6111 poly8x8_t result; 6112 __asm__ ("ld1r {%0.8b}, %1" 6113 : "=w"(result) 6114 : "Utv"(*a) 6115 : /* No clobbers */); 6116 return result; 6117 } 6118 6119 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 6120 vld1_dup_p16 (const poly16_t * a) 6121 { 6122 poly16x4_t result; 6123 __asm__ ("ld1r {%0.4h}, %1" 6124 : "=w"(result) 6125 : "Utv"(*a) 6126 : /* No clobbers */); 6127 return result; 6128 } 6129 6130 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 6131 vld1_dup_s8 (const int8_t * a) 6132 { 6133 int8x8_t result; 6134 __asm__ ("ld1r {%0.8b}, %1" 6135 : "=w"(result) 6136 : "Utv"(*a) 6137 : /* No clobbers */); 6138 return result; 6139 } 6140 6141 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 6142 vld1_dup_s16 (const int16_t * a) 6143 { 6144 int16x4_t result; 6145 __asm__ ("ld1r {%0.4h}, %1" 6146 : "=w"(result) 6147 : "Utv"(*a) 6148 : /* No clobbers */); 6149 return result; 6150 } 6151 6152 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 6153 vld1_dup_s32 (const int32_t * a) 6154 { 6155 int32x2_t result; 6156 __asm__ ("ld1r {%0.2s}, %1" 6157 : "=w"(result) 6158 : "Utv"(*a) 6159 : /* No clobbers */); 6160 return result; 6161 } 6162 6163 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 6164 vld1_dup_s64 (const int64_t * a) 6165 { 6166 int64x1_t result; 6167 __asm__ ("ld1r {%0.1d}, %1" 6168 : "=w"(result) 6169 : "Utv"(*a) 6170 : /* No clobbers */); 6171 return result; 6172 } 6173 6174 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 6175 vld1_dup_u8 (const uint8_t * a) 6176 { 6177 uint8x8_t result; 6178 __asm__ ("ld1r {%0.8b}, %1" 6179 : "=w"(result) 6180 : "Utv"(*a) 6181 : /* No clobbers */); 6182 return result; 6183 } 6184 6185 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 6186 vld1_dup_u16 (const uint16_t * a) 6187 { 6188 uint16x4_t result; 6189 __asm__ ("ld1r {%0.4h}, %1" 6190 : "=w"(result) 6191 : "Utv"(*a) 6192 : /* No clobbers */); 6193 return result; 6194 } 6195 6196 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 6197 vld1_dup_u32 (const uint32_t * a) 6198 { 6199 uint32x2_t result; 6200 __asm__ ("ld1r {%0.2s}, %1" 6201 : "=w"(result) 6202 : "Utv"(*a) 6203 : /* No clobbers */); 6204 return result; 6205 } 6206 6207 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 6208 vld1_dup_u64 (const uint64_t * a) 6209 { 6210 uint64x1_t result; 6211 __asm__ ("ld1r {%0.1d}, %1" 6212 : "=w"(result) 6213 : "Utv"(*a) 6214 : /* No clobbers */); 6215 return result; 6216 } 6217 6218 #define vld1_lane_f32(a, b, c) \ 6219 __extension__ \ 6220 ({ \ 6221 float32x2_t b_ = (b); \ 6222 const float32_t * a_ = (a); \ 6223 float32x2_t result; \ 6224 __asm__ ("ld1 {%0.s}[%1], %2" \ 6225 : "=w"(result) \ 6226 : "i" (c), "Utv"(*a_), "0"(b_) \ 6227 : /* No clobbers */); \ 6228 result; \ 6229 }) 6230 6231 #define vld1_lane_f64(a, b, c) \ 6232 __extension__ \ 6233 ({ \ 6234 float64x1_t b_ = (b); \ 6235 const float64_t * a_ = (a); \ 6236 float64x1_t result; \ 6237 __asm__ ("ld1 {%0.d}[%1], %2" \ 6238 : "=w"(result) \ 6239 : "i" (c), "Utv"(*a_), "0"(b_) \ 6240 : /* No clobbers */); \ 6241 result; \ 6242 }) 6243 6244 #define vld1_lane_p8(a, b, c) \ 6245 __extension__ \ 6246 ({ \ 6247 poly8x8_t b_ = (b); \ 6248 const poly8_t * a_ = (a); \ 6249 poly8x8_t result; \ 6250 __asm__ ("ld1 {%0.b}[%1], %2" \ 6251 : "=w"(result) \ 6252 : "i" (c), "Utv"(*a_), "0"(b_) \ 6253 : /* No clobbers */); \ 6254 result; \ 6255 }) 6256 6257 #define vld1_lane_p16(a, b, c) \ 6258 __extension__ \ 6259 ({ \ 6260 poly16x4_t b_ = (b); \ 6261 const poly16_t * a_ = (a); \ 6262 poly16x4_t result; \ 6263 __asm__ ("ld1 {%0.h}[%1], %2" \ 6264 : "=w"(result) \ 6265 : "i" (c), "Utv"(*a_), "0"(b_) \ 6266 : /* No clobbers */); \ 6267 result; \ 6268 }) 6269 6270 #define vld1_lane_s8(a, b, c) \ 6271 __extension__ \ 6272 ({ \ 6273 int8x8_t b_ = (b); \ 6274 const int8_t * a_ = (a); \ 6275 int8x8_t result; \ 6276 __asm__ ("ld1 {%0.b}[%1], %2" \ 6277 : "=w"(result) \ 6278 : "i" (c), "Utv"(*a_), "0"(b_) \ 6279 : /* No clobbers */); \ 6280 result; \ 6281 }) 6282 6283 #define vld1_lane_s16(a, b, c) \ 6284 __extension__ \ 6285 ({ \ 6286 int16x4_t b_ = (b); \ 6287 const int16_t * a_ = (a); \ 6288 int16x4_t result; \ 6289 __asm__ ("ld1 {%0.h}[%1], %2" \ 6290 : "=w"(result) \ 6291 : "i" (c), "Utv"(*a_), "0"(b_) \ 6292 : /* No clobbers */); \ 6293 result; \ 6294 }) 6295 6296 #define vld1_lane_s32(a, b, c) \ 6297 __extension__ \ 6298 ({ \ 6299 int32x2_t b_ = (b); \ 6300 const int32_t * a_ = (a); \ 6301 int32x2_t result; \ 6302 __asm__ ("ld1 {%0.s}[%1], %2" \ 6303 : "=w"(result) \ 6304 : "i" (c), "Utv"(*a_), "0"(b_) \ 6305 : /* No clobbers */); \ 6306 result; \ 6307 }) 6308 6309 #define vld1_lane_s64(a, b, c) \ 6310 __extension__ \ 6311 ({ \ 6312 int64x1_t b_ = (b); \ 6313 const int64_t * a_ = (a); \ 6314 int64x1_t result; \ 6315 __asm__ ("ld1 {%0.d}[%1], %2" \ 6316 : "=w"(result) \ 6317 : "i" (c), "Utv"(*a_), "0"(b_) \ 6318 : /* No clobbers */); \ 6319 result; \ 6320 }) 6321 6322 #define vld1_lane_u8(a, b, c) \ 6323 __extension__ \ 6324 ({ \ 6325 uint8x8_t b_ = (b); \ 6326 const uint8_t * a_ = (a); \ 6327 uint8x8_t result; \ 6328 __asm__ ("ld1 {%0.b}[%1], %2" \ 6329 : "=w"(result) \ 6330 : "i" (c), "Utv"(*a_), "0"(b_) \ 6331 : /* No clobbers */); \ 6332 result; \ 6333 }) 6334 6335 #define vld1_lane_u16(a, b, c) \ 6336 __extension__ \ 6337 ({ \ 6338 uint16x4_t b_ = (b); \ 6339 const uint16_t * a_ = (a); \ 6340 uint16x4_t result; \ 6341 __asm__ ("ld1 {%0.h}[%1], %2" \ 6342 : "=w"(result) \ 6343 : "i" (c), "Utv"(*a_), "0"(b_) \ 6344 : /* No clobbers */); \ 6345 result; \ 6346 }) 6347 6348 #define vld1_lane_u32(a, b, c) \ 6349 __extension__ \ 6350 ({ \ 6351 uint32x2_t b_ = (b); \ 6352 const uint32_t * a_ = (a); \ 6353 uint32x2_t result; \ 6354 __asm__ ("ld1 {%0.s}[%1], %2" \ 6355 : "=w"(result) \ 6356 : "i" (c), "Utv"(*a_), "0"(b_) \ 6357 : /* No clobbers */); \ 6358 result; \ 6359 }) 6360 6361 #define vld1_lane_u64(a, b, c) \ 6362 __extension__ \ 6363 ({ \ 6364 uint64x1_t b_ = (b); \ 6365 const uint64_t * a_ = (a); \ 6366 uint64x1_t result; \ 6367 __asm__ ("ld1 {%0.d}[%1], %2" \ 6368 : "=w"(result) \ 6369 : "i" (c), "Utv"(*a_), "0"(b_) \ 6370 : /* No clobbers */); \ 6371 result; \ 6372 }) 6373 6374 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 6375 vld1q_dup_f32 (const float32_t * a) 6376 { 6377 float32x4_t result; 6378 __asm__ ("ld1r {%0.4s}, %1" 6379 : "=w"(result) 6380 : "Utv"(*a) 6381 : /* No clobbers */); 6382 return result; 6383 } 6384 6385 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 6386 vld1q_dup_f64 (const float64_t * a) 6387 { 6388 float64x2_t result; 6389 __asm__ ("ld1r {%0.2d}, %1" 6390 : "=w"(result) 6391 : "Utv"(*a) 6392 : /* No clobbers */); 6393 return result; 6394 } 6395 6396 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 6397 vld1q_dup_p8 (const poly8_t * a) 6398 { 6399 poly8x16_t result; 6400 __asm__ ("ld1r {%0.16b}, %1" 6401 : "=w"(result) 6402 : "Utv"(*a) 6403 : /* No clobbers */); 6404 return result; 6405 } 6406 6407 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 6408 vld1q_dup_p16 (const poly16_t * a) 6409 { 6410 poly16x8_t result; 6411 __asm__ ("ld1r {%0.8h}, %1" 6412 : "=w"(result) 6413 : "Utv"(*a) 6414 : /* No clobbers */); 6415 return result; 6416 } 6417 6418 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 6419 vld1q_dup_s8 (const int8_t * a) 6420 { 6421 int8x16_t result; 6422 __asm__ ("ld1r {%0.16b}, %1" 6423 : "=w"(result) 6424 : "Utv"(*a) 6425 : /* No clobbers */); 6426 return result; 6427 } 6428 6429 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 6430 vld1q_dup_s16 (const int16_t * a) 6431 { 6432 int16x8_t result; 6433 __asm__ ("ld1r {%0.8h}, %1" 6434 : "=w"(result) 6435 : "Utv"(*a) 6436 : /* No clobbers */); 6437 return result; 6438 } 6439 6440 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 6441 vld1q_dup_s32 (const int32_t * a) 6442 { 6443 int32x4_t result; 6444 __asm__ ("ld1r {%0.4s}, %1" 6445 : "=w"(result) 6446 : "Utv"(*a) 6447 : /* No clobbers */); 6448 return result; 6449 } 6450 6451 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 6452 vld1q_dup_s64 (const int64_t * a) 6453 { 6454 int64x2_t result; 6455 __asm__ ("ld1r {%0.2d}, %1" 6456 : "=w"(result) 6457 : "Utv"(*a) 6458 : /* No clobbers */); 6459 return result; 6460 } 6461 6462 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 6463 vld1q_dup_u8 (const uint8_t * a) 6464 { 6465 uint8x16_t result; 6466 __asm__ ("ld1r {%0.16b}, %1" 6467 : "=w"(result) 6468 : "Utv"(*a) 6469 : /* No clobbers */); 6470 return result; 6471 } 6472 6473 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 6474 vld1q_dup_u16 (const uint16_t * a) 6475 { 6476 uint16x8_t result; 6477 __asm__ ("ld1r {%0.8h}, %1" 6478 : "=w"(result) 6479 : "Utv"(*a) 6480 : /* No clobbers */); 6481 return result; 6482 } 6483 6484 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 6485 vld1q_dup_u32 (const uint32_t * a) 6486 { 6487 uint32x4_t result; 6488 __asm__ ("ld1r {%0.4s}, %1" 6489 : "=w"(result) 6490 : "Utv"(*a) 6491 : /* No clobbers */); 6492 return result; 6493 } 6494 6495 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 6496 vld1q_dup_u64 (const uint64_t * a) 6497 { 6498 uint64x2_t result; 6499 __asm__ ("ld1r {%0.2d}, %1" 6500 : "=w"(result) 6501 : "Utv"(*a) 6502 : /* No clobbers */); 6503 return result; 6504 } 6505 6506 #define vld1q_lane_f32(a, b, c) \ 6507 __extension__ \ 6508 ({ \ 6509 float32x4_t b_ = (b); \ 6510 const float32_t * a_ = (a); \ 6511 float32x4_t result; \ 6512 __asm__ ("ld1 {%0.s}[%1], %2" \ 6513 : "=w"(result) \ 6514 : "i"(c), "Utv"(*a_), "0"(b_) \ 6515 : /* No clobbers */); \ 6516 result; \ 6517 }) 6518 6519 #define vld1q_lane_f64(a, b, c) \ 6520 __extension__ \ 6521 ({ \ 6522 float64x2_t b_ = (b); \ 6523 const float64_t * a_ = (a); \ 6524 float64x2_t result; \ 6525 __asm__ ("ld1 {%0.d}[%1], %2" \ 6526 : "=w"(result) \ 6527 : "i"(c), "Utv"(*a_), "0"(b_) \ 6528 : /* No clobbers */); \ 6529 result; \ 6530 }) 6531 6532 #define vld1q_lane_p8(a, b, c) \ 6533 __extension__ \ 6534 ({ \ 6535 poly8x16_t b_ = (b); \ 6536 const poly8_t * a_ = (a); \ 6537 poly8x16_t result; \ 6538 __asm__ ("ld1 {%0.b}[%1], %2" \ 6539 : "=w"(result) \ 6540 : "i"(c), "Utv"(*a_), "0"(b_) \ 6541 : /* No clobbers */); \ 6542 result; \ 6543 }) 6544 6545 #define vld1q_lane_p16(a, b, c) \ 6546 __extension__ \ 6547 ({ \ 6548 poly16x8_t b_ = (b); \ 6549 const poly16_t * a_ = (a); \ 6550 poly16x8_t result; \ 6551 __asm__ ("ld1 {%0.h}[%1], %2" \ 6552 : "=w"(result) \ 6553 : "i"(c), "Utv"(*a_), "0"(b_) \ 6554 : /* No clobbers */); \ 6555 result; \ 6556 }) 6557 6558 #define vld1q_lane_s8(a, b, c) \ 6559 __extension__ \ 6560 ({ \ 6561 int8x16_t b_ = (b); \ 6562 const int8_t * a_ = (a); \ 6563 int8x16_t result; \ 6564 __asm__ ("ld1 {%0.b}[%1], %2" \ 6565 : "=w"(result) \ 6566 : "i"(c), "Utv"(*a_), "0"(b_) \ 6567 : /* No clobbers */); \ 6568 result; \ 6569 }) 6570 6571 #define vld1q_lane_s16(a, b, c) \ 6572 __extension__ \ 6573 ({ \ 6574 int16x8_t b_ = (b); \ 6575 const int16_t * a_ = (a); \ 6576 int16x8_t result; \ 6577 __asm__ ("ld1 {%0.h}[%1], %2" \ 6578 : "=w"(result) \ 6579 : "i"(c), "Utv"(*a_), "0"(b_) \ 6580 : /* No clobbers */); \ 6581 result; \ 6582 }) 6583 6584 #define vld1q_lane_s32(a, b, c) \ 6585 __extension__ \ 6586 ({ \ 6587 int32x4_t b_ = (b); \ 6588 const int32_t * a_ = (a); \ 6589 int32x4_t result; \ 6590 __asm__ ("ld1 {%0.s}[%1], %2" \ 6591 : "=w"(result) \ 6592 : "i"(c), "Utv"(*a_), "0"(b_) \ 6593 : /* No clobbers */); \ 6594 result; \ 6595 }) 6596 6597 #define vld1q_lane_s64(a, b, c) \ 6598 __extension__ \ 6599 ({ \ 6600 int64x2_t b_ = (b); \ 6601 const int64_t * a_ = (a); \ 6602 int64x2_t result; \ 6603 __asm__ ("ld1 {%0.d}[%1], %2" \ 6604 : "=w"(result) \ 6605 : "i"(c), "Utv"(*a_), "0"(b_) \ 6606 : /* No clobbers */); \ 6607 result; \ 6608 }) 6609 6610 #define vld1q_lane_u8(a, b, c) \ 6611 __extension__ \ 6612 ({ \ 6613 uint8x16_t b_ = (b); \ 6614 const uint8_t * a_ = (a); \ 6615 uint8x16_t result; \ 6616 __asm__ ("ld1 {%0.b}[%1], %2" \ 6617 : "=w"(result) \ 6618 : "i"(c), "Utv"(*a_), "0"(b_) \ 6619 : /* No clobbers */); \ 6620 result; \ 6621 }) 6622 6623 #define vld1q_lane_u16(a, b, c) \ 6624 __extension__ \ 6625 ({ \ 6626 uint16x8_t b_ = (b); \ 6627 const uint16_t * a_ = (a); \ 6628 uint16x8_t result; \ 6629 __asm__ ("ld1 {%0.h}[%1], %2" \ 6630 : "=w"(result) \ 6631 : "i"(c), "Utv"(*a_), "0"(b_) \ 6632 : /* No clobbers */); \ 6633 result; \ 6634 }) 6635 6636 #define vld1q_lane_u32(a, b, c) \ 6637 __extension__ \ 6638 ({ \ 6639 uint32x4_t b_ = (b); \ 6640 const uint32_t * a_ = (a); \ 6641 uint32x4_t result; \ 6642 __asm__ ("ld1 {%0.s}[%1], %2" \ 6643 : "=w"(result) \ 6644 : "i"(c), "Utv"(*a_), "0"(b_) \ 6645 : /* No clobbers */); \ 6646 result; \ 6647 }) 6648 6649 #define vld1q_lane_u64(a, b, c) \ 6650 __extension__ \ 6651 ({ \ 6652 uint64x2_t b_ = (b); \ 6653 const uint64_t * a_ = (a); \ 6654 uint64x2_t result; \ 6655 __asm__ ("ld1 {%0.d}[%1], %2" \ 6656 : "=w"(result) \ 6657 : "i"(c), "Utv"(*a_), "0"(b_) \ 6658 : /* No clobbers */); \ 6659 result; \ 6660 }) 6661 6662 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 6663 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) 6664 { 6665 float32x2_t result; 6666 float32x2_t t1; 6667 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" 6668 : "=w"(result), "=w"(t1) 6669 : "0"(a), "w"(b), "w"(c) 6670 : /* No clobbers */); 6671 return result; 6672 } 6673 6674 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 6675 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) 6676 { 6677 int16x4_t result; 6678 __asm__ ("mla %0.4h,%2.4h,%3.h[0]" 6679 : "=w"(result) 6680 : "0"(a), "w"(b), "x"(c) 6681 : /* No clobbers */); 6682 return result; 6683 } 6684 6685 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 6686 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) 6687 { 6688 int32x2_t result; 6689 __asm__ ("mla %0.2s,%2.2s,%3.s[0]" 6690 : "=w"(result) 6691 : "0"(a), "w"(b), "w"(c) 6692 : /* No clobbers */); 6693 return result; 6694 } 6695 6696 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 6697 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) 6698 { 6699 uint16x4_t result; 6700 __asm__ ("mla %0.4h,%2.4h,%3.h[0]" 6701 : "=w"(result) 6702 : "0"(a), "w"(b), "x"(c) 6703 : /* No clobbers */); 6704 return result; 6705 } 6706 6707 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 6708 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) 6709 { 6710 uint32x2_t result; 6711 __asm__ ("mla %0.2s,%2.2s,%3.s[0]" 6712 : "=w"(result) 6713 : "0"(a), "w"(b), "w"(c) 6714 : /* No clobbers */); 6715 return result; 6716 } 6717 6718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 6719 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) 6720 { 6721 int8x8_t result; 6722 __asm__ ("mla %0.8b, %2.8b, %3.8b" 6723 : "=w"(result) 6724 : "0"(a), "w"(b), "w"(c) 6725 : /* No clobbers */); 6726 return result; 6727 } 6728 6729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 6730 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) 6731 { 6732 int16x4_t result; 6733 __asm__ ("mla %0.4h, %2.4h, %3.4h" 6734 : "=w"(result) 6735 : "0"(a), "w"(b), "w"(c) 6736 : /* No clobbers */); 6737 return result; 6738 } 6739 6740 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 6741 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) 6742 { 6743 int32x2_t result; 6744 __asm__ ("mla %0.2s, %2.2s, %3.2s" 6745 : "=w"(result) 6746 : "0"(a), "w"(b), "w"(c) 6747 : /* No clobbers */); 6748 return result; 6749 } 6750 6751 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 6752 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) 6753 { 6754 uint8x8_t result; 6755 __asm__ ("mla %0.8b, %2.8b, %3.8b" 6756 : "=w"(result) 6757 : "0"(a), "w"(b), "w"(c) 6758 : /* No clobbers */); 6759 return result; 6760 } 6761 6762 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 6763 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) 6764 { 6765 uint16x4_t result; 6766 __asm__ ("mla %0.4h, %2.4h, %3.4h" 6767 : "=w"(result) 6768 : "0"(a), "w"(b), "w"(c) 6769 : /* No clobbers */); 6770 return result; 6771 } 6772 6773 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 6774 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) 6775 { 6776 uint32x2_t result; 6777 __asm__ ("mla %0.2s, %2.2s, %3.2s" 6778 : "=w"(result) 6779 : "0"(a), "w"(b), "w"(c) 6780 : /* No clobbers */); 6781 return result; 6782 } 6783 6784 #define vmlal_high_lane_s16(a, b, c, d) \ 6785 __extension__ \ 6786 ({ \ 6787 int16x8_t c_ = (c); \ 6788 int16x8_t b_ = (b); \ 6789 int32x4_t a_ = (a); \ 6790 int32x4_t result; \ 6791 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ 6792 : "=w"(result) \ 6793 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 6794 : /* No clobbers */); \ 6795 result; \ 6796 }) 6797 6798 #define vmlal_high_lane_s32(a, b, c, d) \ 6799 __extension__ \ 6800 ({ \ 6801 int32x4_t c_ = (c); \ 6802 int32x4_t b_ = (b); \ 6803 int64x2_t a_ = (a); \ 6804 int64x2_t result; \ 6805 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ 6806 : "=w"(result) \ 6807 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 6808 : /* No clobbers */); \ 6809 result; \ 6810 }) 6811 6812 #define vmlal_high_lane_u16(a, b, c, d) \ 6813 __extension__ \ 6814 ({ \ 6815 uint16x8_t c_ = (c); \ 6816 uint16x8_t b_ = (b); \ 6817 uint32x4_t a_ = (a); \ 6818 uint32x4_t result; \ 6819 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ 6820 : "=w"(result) \ 6821 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 6822 : /* No clobbers */); \ 6823 result; \ 6824 }) 6825 6826 #define vmlal_high_lane_u32(a, b, c, d) \ 6827 __extension__ \ 6828 ({ \ 6829 uint32x4_t c_ = (c); \ 6830 uint32x4_t b_ = (b); \ 6831 uint64x2_t a_ = (a); \ 6832 uint64x2_t result; \ 6833 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ 6834 : "=w"(result) \ 6835 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 6836 : /* No clobbers */); \ 6837 result; \ 6838 }) 6839 6840 #define vmlal_high_laneq_s16(a, b, c, d) \ 6841 __extension__ \ 6842 ({ \ 6843 int16x8_t c_ = (c); \ 6844 int16x8_t b_ = (b); \ 6845 int32x4_t a_ = (a); \ 6846 int32x4_t result; \ 6847 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ 6848 : "=w"(result) \ 6849 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 6850 : /* No clobbers */); \ 6851 result; \ 6852 }) 6853 6854 #define vmlal_high_laneq_s32(a, b, c, d) \ 6855 __extension__ \ 6856 ({ \ 6857 int32x4_t c_ = (c); \ 6858 int32x4_t b_ = (b); \ 6859 int64x2_t a_ = (a); \ 6860 int64x2_t result; \ 6861 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ 6862 : "=w"(result) \ 6863 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 6864 : /* No clobbers */); \ 6865 result; \ 6866 }) 6867 6868 #define vmlal_high_laneq_u16(a, b, c, d) \ 6869 __extension__ \ 6870 ({ \ 6871 uint16x8_t c_ = (c); \ 6872 uint16x8_t b_ = (b); \ 6873 uint32x4_t a_ = (a); \ 6874 uint32x4_t result; \ 6875 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ 6876 : "=w"(result) \ 6877 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 6878 : /* No clobbers */); \ 6879 result; \ 6880 }) 6881 6882 #define vmlal_high_laneq_u32(a, b, c, d) \ 6883 __extension__ \ 6884 ({ \ 6885 uint32x4_t c_ = (c); \ 6886 uint32x4_t b_ = (b); \ 6887 uint64x2_t a_ = (a); \ 6888 uint64x2_t result; \ 6889 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ 6890 : "=w"(result) \ 6891 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 6892 : /* No clobbers */); \ 6893 result; \ 6894 }) 6895 6896 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 6897 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) 6898 { 6899 int32x4_t result; 6900 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" 6901 : "=w"(result) 6902 : "0"(a), "w"(b), "x"(c) 6903 : /* No clobbers */); 6904 return result; 6905 } 6906 6907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 6908 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) 6909 { 6910 int64x2_t result; 6911 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" 6912 : "=w"(result) 6913 : "0"(a), "w"(b), "w"(c) 6914 : /* No clobbers */); 6915 return result; 6916 } 6917 6918 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 6919 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) 6920 { 6921 uint32x4_t result; 6922 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" 6923 : "=w"(result) 6924 : "0"(a), "w"(b), "x"(c) 6925 : /* No clobbers */); 6926 return result; 6927 } 6928 6929 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 6930 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) 6931 { 6932 uint64x2_t result; 6933 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" 6934 : "=w"(result) 6935 : "0"(a), "w"(b), "w"(c) 6936 : /* No clobbers */); 6937 return result; 6938 } 6939 6940 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 6941 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) 6942 { 6943 int16x8_t result; 6944 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" 6945 : "=w"(result) 6946 : "0"(a), "w"(b), "w"(c) 6947 : /* No clobbers */); 6948 return result; 6949 } 6950 6951 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 6952 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) 6953 { 6954 int32x4_t result; 6955 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" 6956 : "=w"(result) 6957 : "0"(a), "w"(b), "w"(c) 6958 : /* No clobbers */); 6959 return result; 6960 } 6961 6962 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 6963 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) 6964 { 6965 int64x2_t result; 6966 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" 6967 : "=w"(result) 6968 : "0"(a), "w"(b), "w"(c) 6969 : /* No clobbers */); 6970 return result; 6971 } 6972 6973 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 6974 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) 6975 { 6976 uint16x8_t result; 6977 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" 6978 : "=w"(result) 6979 : "0"(a), "w"(b), "w"(c) 6980 : /* No clobbers */); 6981 return result; 6982 } 6983 6984 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 6985 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) 6986 { 6987 uint32x4_t result; 6988 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" 6989 : "=w"(result) 6990 : "0"(a), "w"(b), "w"(c) 6991 : /* No clobbers */); 6992 return result; 6993 } 6994 6995 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 6996 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) 6997 { 6998 uint64x2_t result; 6999 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" 7000 : "=w"(result) 7001 : "0"(a), "w"(b), "w"(c) 7002 : /* No clobbers */); 7003 return result; 7004 } 7005 7006 #define vmlal_lane_s16(a, b, c, d) \ 7007 __extension__ \ 7008 ({ \ 7009 int16x4_t c_ = (c); \ 7010 int16x4_t b_ = (b); \ 7011 int32x4_t a_ = (a); \ 7012 int32x4_t result; \ 7013 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ 7014 : "=w"(result) \ 7015 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7016 : /* No clobbers */); \ 7017 result; \ 7018 }) 7019 7020 #define vmlal_lane_s32(a, b, c, d) \ 7021 __extension__ \ 7022 ({ \ 7023 int32x2_t c_ = (c); \ 7024 int32x2_t b_ = (b); \ 7025 int64x2_t a_ = (a); \ 7026 int64x2_t result; \ 7027 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \ 7028 : "=w"(result) \ 7029 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7030 : /* No clobbers */); \ 7031 result; \ 7032 }) 7033 7034 #define vmlal_lane_u16(a, b, c, d) \ 7035 __extension__ \ 7036 ({ \ 7037 uint16x4_t c_ = (c); \ 7038 uint16x4_t b_ = (b); \ 7039 uint32x4_t a_ = (a); \ 7040 uint32x4_t result; \ 7041 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ 7042 : "=w"(result) \ 7043 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7044 : /* No clobbers */); \ 7045 result; \ 7046 }) 7047 7048 #define vmlal_lane_u32(a, b, c, d) \ 7049 __extension__ \ 7050 ({ \ 7051 uint32x2_t c_ = (c); \ 7052 uint32x2_t b_ = (b); \ 7053 uint64x2_t a_ = (a); \ 7054 uint64x2_t result; \ 7055 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ 7056 : "=w"(result) \ 7057 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7058 : /* No clobbers */); \ 7059 result; \ 7060 }) 7061 7062 #define vmlal_laneq_s16(a, b, c, d) \ 7063 __extension__ \ 7064 ({ \ 7065 int16x8_t c_ = (c); \ 7066 int16x4_t b_ = (b); \ 7067 int32x4_t a_ = (a); \ 7068 int32x4_t result; \ 7069 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ 7070 : "=w"(result) \ 7071 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7072 : /* No clobbers */); \ 7073 result; \ 7074 }) 7075 7076 #define vmlal_laneq_s32(a, b, c, d) \ 7077 __extension__ \ 7078 ({ \ 7079 int32x4_t c_ = (c); \ 7080 int32x2_t b_ = (b); \ 7081 int64x2_t a_ = (a); \ 7082 int64x2_t result; \ 7083 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \ 7084 : "=w"(result) \ 7085 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7086 : /* No clobbers */); \ 7087 result; \ 7088 }) 7089 7090 #define vmlal_laneq_u16(a, b, c, d) \ 7091 __extension__ \ 7092 ({ \ 7093 uint16x8_t c_ = (c); \ 7094 uint16x4_t b_ = (b); \ 7095 uint32x4_t a_ = (a); \ 7096 uint32x4_t result; \ 7097 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ 7098 : "=w"(result) \ 7099 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7100 : /* No clobbers */); \ 7101 result; \ 7102 }) 7103 7104 #define vmlal_laneq_u32(a, b, c, d) \ 7105 __extension__ \ 7106 ({ \ 7107 uint32x4_t c_ = (c); \ 7108 uint32x2_t b_ = (b); \ 7109 uint64x2_t a_ = (a); \ 7110 uint64x2_t result; \ 7111 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ 7112 : "=w"(result) \ 7113 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7114 : /* No clobbers */); \ 7115 result; \ 7116 }) 7117 7118 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7119 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) 7120 { 7121 int32x4_t result; 7122 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" 7123 : "=w"(result) 7124 : "0"(a), "w"(b), "x"(c) 7125 : /* No clobbers */); 7126 return result; 7127 } 7128 7129 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 7130 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) 7131 { 7132 int64x2_t result; 7133 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" 7134 : "=w"(result) 7135 : "0"(a), "w"(b), "w"(c) 7136 : /* No clobbers */); 7137 return result; 7138 } 7139 7140 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7141 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) 7142 { 7143 uint32x4_t result; 7144 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" 7145 : "=w"(result) 7146 : "0"(a), "w"(b), "x"(c) 7147 : /* No clobbers */); 7148 return result; 7149 } 7150 7151 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 7152 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) 7153 { 7154 uint64x2_t result; 7155 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" 7156 : "=w"(result) 7157 : "0"(a), "w"(b), "w"(c) 7158 : /* No clobbers */); 7159 return result; 7160 } 7161 7162 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 7163 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) 7164 { 7165 int16x8_t result; 7166 __asm__ ("smlal %0.8h,%2.8b,%3.8b" 7167 : "=w"(result) 7168 : "0"(a), "w"(b), "w"(c) 7169 : /* No clobbers */); 7170 return result; 7171 } 7172 7173 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7174 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) 7175 { 7176 int32x4_t result; 7177 __asm__ ("smlal %0.4s,%2.4h,%3.4h" 7178 : "=w"(result) 7179 : "0"(a), "w"(b), "w"(c) 7180 : /* No clobbers */); 7181 return result; 7182 } 7183 7184 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 7185 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) 7186 { 7187 int64x2_t result; 7188 __asm__ ("smlal %0.2d,%2.2s,%3.2s" 7189 : "=w"(result) 7190 : "0"(a), "w"(b), "w"(c) 7191 : /* No clobbers */); 7192 return result; 7193 } 7194 7195 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 7196 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) 7197 { 7198 uint16x8_t result; 7199 __asm__ ("umlal %0.8h,%2.8b,%3.8b" 7200 : "=w"(result) 7201 : "0"(a), "w"(b), "w"(c) 7202 : /* No clobbers */); 7203 return result; 7204 } 7205 7206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7207 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) 7208 { 7209 uint32x4_t result; 7210 __asm__ ("umlal %0.4s,%2.4h,%3.4h" 7211 : "=w"(result) 7212 : "0"(a), "w"(b), "w"(c) 7213 : /* No clobbers */); 7214 return result; 7215 } 7216 7217 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 7218 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) 7219 { 7220 uint64x2_t result; 7221 __asm__ ("umlal %0.2d,%2.2s,%3.2s" 7222 : "=w"(result) 7223 : "0"(a), "w"(b), "w"(c) 7224 : /* No clobbers */); 7225 return result; 7226 } 7227 7228 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 7229 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) 7230 { 7231 float32x4_t result; 7232 float32x4_t t1; 7233 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" 7234 : "=w"(result), "=w"(t1) 7235 : "0"(a), "w"(b), "w"(c) 7236 : /* No clobbers */); 7237 return result; 7238 } 7239 7240 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 7241 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) 7242 { 7243 float64x2_t result; 7244 float64x2_t t1; 7245 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d" 7246 : "=w"(result), "=w"(t1) 7247 : "0"(a), "w"(b), "w"(c) 7248 : /* No clobbers */); 7249 return result; 7250 } 7251 7252 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 7253 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) 7254 { 7255 int16x8_t result; 7256 __asm__ ("mla %0.8h,%2.8h,%3.h[0]" 7257 : "=w"(result) 7258 : "0"(a), "w"(b), "x"(c) 7259 : /* No clobbers */); 7260 return result; 7261 } 7262 7263 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7264 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) 7265 { 7266 int32x4_t result; 7267 __asm__ ("mla %0.4s,%2.4s,%3.s[0]" 7268 : "=w"(result) 7269 : "0"(a), "w"(b), "w"(c) 7270 : /* No clobbers */); 7271 return result; 7272 } 7273 7274 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 7275 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) 7276 { 7277 uint16x8_t result; 7278 __asm__ ("mla %0.8h,%2.8h,%3.h[0]" 7279 : "=w"(result) 7280 : "0"(a), "w"(b), "x"(c) 7281 : /* No clobbers */); 7282 return result; 7283 } 7284 7285 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7286 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) 7287 { 7288 uint32x4_t result; 7289 __asm__ ("mla %0.4s,%2.4s,%3.s[0]" 7290 : "=w"(result) 7291 : "0"(a), "w"(b), "w"(c) 7292 : /* No clobbers */); 7293 return result; 7294 } 7295 7296 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 7297 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) 7298 { 7299 int8x16_t result; 7300 __asm__ ("mla %0.16b, %2.16b, %3.16b" 7301 : "=w"(result) 7302 : "0"(a), "w"(b), "w"(c) 7303 : /* No clobbers */); 7304 return result; 7305 } 7306 7307 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 7308 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) 7309 { 7310 int16x8_t result; 7311 __asm__ ("mla %0.8h, %2.8h, %3.8h" 7312 : "=w"(result) 7313 : "0"(a), "w"(b), "w"(c) 7314 : /* No clobbers */); 7315 return result; 7316 } 7317 7318 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7319 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) 7320 { 7321 int32x4_t result; 7322 __asm__ ("mla %0.4s, %2.4s, %3.4s" 7323 : "=w"(result) 7324 : "0"(a), "w"(b), "w"(c) 7325 : /* No clobbers */); 7326 return result; 7327 } 7328 7329 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 7330 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) 7331 { 7332 uint8x16_t result; 7333 __asm__ ("mla %0.16b, %2.16b, %3.16b" 7334 : "=w"(result) 7335 : "0"(a), "w"(b), "w"(c) 7336 : /* No clobbers */); 7337 return result; 7338 } 7339 7340 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 7341 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) 7342 { 7343 uint16x8_t result; 7344 __asm__ ("mla %0.8h, %2.8h, %3.8h" 7345 : "=w"(result) 7346 : "0"(a), "w"(b), "w"(c) 7347 : /* No clobbers */); 7348 return result; 7349 } 7350 7351 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7352 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) 7353 { 7354 uint32x4_t result; 7355 __asm__ ("mla %0.4s, %2.4s, %3.4s" 7356 : "=w"(result) 7357 : "0"(a), "w"(b), "w"(c) 7358 : /* No clobbers */); 7359 return result; 7360 } 7361 7362 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 7363 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) 7364 { 7365 float32x2_t result; 7366 float32x2_t t1; 7367 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" 7368 : "=w"(result), "=w"(t1) 7369 : "0"(a), "w"(b), "w"(c) 7370 : /* No clobbers */); 7371 return result; 7372 } 7373 7374 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 7375 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) 7376 { 7377 int16x4_t result; 7378 __asm__ ("mls %0.4h, %2.4h, %3.h[0]" 7379 : "=w"(result) 7380 : "0"(a), "w"(b), "x"(c) 7381 : /* No clobbers */); 7382 return result; 7383 } 7384 7385 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 7386 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) 7387 { 7388 int32x2_t result; 7389 __asm__ ("mls %0.2s, %2.2s, %3.s[0]" 7390 : "=w"(result) 7391 : "0"(a), "w"(b), "w"(c) 7392 : /* No clobbers */); 7393 return result; 7394 } 7395 7396 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 7397 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) 7398 { 7399 uint16x4_t result; 7400 __asm__ ("mls %0.4h, %2.4h, %3.h[0]" 7401 : "=w"(result) 7402 : "0"(a), "w"(b), "x"(c) 7403 : /* No clobbers */); 7404 return result; 7405 } 7406 7407 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 7408 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) 7409 { 7410 uint32x2_t result; 7411 __asm__ ("mls %0.2s, %2.2s, %3.s[0]" 7412 : "=w"(result) 7413 : "0"(a), "w"(b), "w"(c) 7414 : /* No clobbers */); 7415 return result; 7416 } 7417 7418 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 7419 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) 7420 { 7421 int8x8_t result; 7422 __asm__ ("mls %0.8b,%2.8b,%3.8b" 7423 : "=w"(result) 7424 : "0"(a), "w"(b), "w"(c) 7425 : /* No clobbers */); 7426 return result; 7427 } 7428 7429 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 7430 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) 7431 { 7432 int16x4_t result; 7433 __asm__ ("mls %0.4h,%2.4h,%3.4h" 7434 : "=w"(result) 7435 : "0"(a), "w"(b), "w"(c) 7436 : /* No clobbers */); 7437 return result; 7438 } 7439 7440 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 7441 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) 7442 { 7443 int32x2_t result; 7444 __asm__ ("mls %0.2s,%2.2s,%3.2s" 7445 : "=w"(result) 7446 : "0"(a), "w"(b), "w"(c) 7447 : /* No clobbers */); 7448 return result; 7449 } 7450 7451 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 7452 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) 7453 { 7454 uint8x8_t result; 7455 __asm__ ("mls %0.8b,%2.8b,%3.8b" 7456 : "=w"(result) 7457 : "0"(a), "w"(b), "w"(c) 7458 : /* No clobbers */); 7459 return result; 7460 } 7461 7462 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 7463 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) 7464 { 7465 uint16x4_t result; 7466 __asm__ ("mls %0.4h,%2.4h,%3.4h" 7467 : "=w"(result) 7468 : "0"(a), "w"(b), "w"(c) 7469 : /* No clobbers */); 7470 return result; 7471 } 7472 7473 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 7474 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) 7475 { 7476 uint32x2_t result; 7477 __asm__ ("mls %0.2s,%2.2s,%3.2s" 7478 : "=w"(result) 7479 : "0"(a), "w"(b), "w"(c) 7480 : /* No clobbers */); 7481 return result; 7482 } 7483 7484 #define vmlsl_high_lane_s16(a, b, c, d) \ 7485 __extension__ \ 7486 ({ \ 7487 int16x8_t c_ = (c); \ 7488 int16x8_t b_ = (b); \ 7489 int32x4_t a_ = (a); \ 7490 int32x4_t result; \ 7491 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ 7492 : "=w"(result) \ 7493 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7494 : /* No clobbers */); \ 7495 result; \ 7496 }) 7497 7498 #define vmlsl_high_lane_s32(a, b, c, d) \ 7499 __extension__ \ 7500 ({ \ 7501 int32x4_t c_ = (c); \ 7502 int32x4_t b_ = (b); \ 7503 int64x2_t a_ = (a); \ 7504 int64x2_t result; \ 7505 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ 7506 : "=w"(result) \ 7507 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7508 : /* No clobbers */); \ 7509 result; \ 7510 }) 7511 7512 #define vmlsl_high_lane_u16(a, b, c, d) \ 7513 __extension__ \ 7514 ({ \ 7515 uint16x8_t c_ = (c); \ 7516 uint16x8_t b_ = (b); \ 7517 uint32x4_t a_ = (a); \ 7518 uint32x4_t result; \ 7519 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ 7520 : "=w"(result) \ 7521 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7522 : /* No clobbers */); \ 7523 result; \ 7524 }) 7525 7526 #define vmlsl_high_lane_u32(a, b, c, d) \ 7527 __extension__ \ 7528 ({ \ 7529 uint32x4_t c_ = (c); \ 7530 uint32x4_t b_ = (b); \ 7531 uint64x2_t a_ = (a); \ 7532 uint64x2_t result; \ 7533 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ 7534 : "=w"(result) \ 7535 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7536 : /* No clobbers */); \ 7537 result; \ 7538 }) 7539 7540 #define vmlsl_high_laneq_s16(a, b, c, d) \ 7541 __extension__ \ 7542 ({ \ 7543 int16x8_t c_ = (c); \ 7544 int16x8_t b_ = (b); \ 7545 int32x4_t a_ = (a); \ 7546 int32x4_t result; \ 7547 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ 7548 : "=w"(result) \ 7549 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7550 : /* No clobbers */); \ 7551 result; \ 7552 }) 7553 7554 #define vmlsl_high_laneq_s32(a, b, c, d) \ 7555 __extension__ \ 7556 ({ \ 7557 int32x4_t c_ = (c); \ 7558 int32x4_t b_ = (b); \ 7559 int64x2_t a_ = (a); \ 7560 int64x2_t result; \ 7561 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ 7562 : "=w"(result) \ 7563 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7564 : /* No clobbers */); \ 7565 result; \ 7566 }) 7567 7568 #define vmlsl_high_laneq_u16(a, b, c, d) \ 7569 __extension__ \ 7570 ({ \ 7571 uint16x8_t c_ = (c); \ 7572 uint16x8_t b_ = (b); \ 7573 uint32x4_t a_ = (a); \ 7574 uint32x4_t result; \ 7575 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ 7576 : "=w"(result) \ 7577 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7578 : /* No clobbers */); \ 7579 result; \ 7580 }) 7581 7582 #define vmlsl_high_laneq_u32(a, b, c, d) \ 7583 __extension__ \ 7584 ({ \ 7585 uint32x4_t c_ = (c); \ 7586 uint32x4_t b_ = (b); \ 7587 uint64x2_t a_ = (a); \ 7588 uint64x2_t result; \ 7589 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ 7590 : "=w"(result) \ 7591 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7592 : /* No clobbers */); \ 7593 result; \ 7594 }) 7595 7596 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7597 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) 7598 { 7599 int32x4_t result; 7600 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" 7601 : "=w"(result) 7602 : "0"(a), "w"(b), "x"(c) 7603 : /* No clobbers */); 7604 return result; 7605 } 7606 7607 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 7608 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) 7609 { 7610 int64x2_t result; 7611 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" 7612 : "=w"(result) 7613 : "0"(a), "w"(b), "w"(c) 7614 : /* No clobbers */); 7615 return result; 7616 } 7617 7618 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7619 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) 7620 { 7621 uint32x4_t result; 7622 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" 7623 : "=w"(result) 7624 : "0"(a), "w"(b), "x"(c) 7625 : /* No clobbers */); 7626 return result; 7627 } 7628 7629 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 7630 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) 7631 { 7632 uint64x2_t result; 7633 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" 7634 : "=w"(result) 7635 : "0"(a), "w"(b), "w"(c) 7636 : /* No clobbers */); 7637 return result; 7638 } 7639 7640 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 7641 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) 7642 { 7643 int16x8_t result; 7644 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" 7645 : "=w"(result) 7646 : "0"(a), "w"(b), "w"(c) 7647 : /* No clobbers */); 7648 return result; 7649 } 7650 7651 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7652 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) 7653 { 7654 int32x4_t result; 7655 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" 7656 : "=w"(result) 7657 : "0"(a), "w"(b), "w"(c) 7658 : /* No clobbers */); 7659 return result; 7660 } 7661 7662 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 7663 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) 7664 { 7665 int64x2_t result; 7666 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" 7667 : "=w"(result) 7668 : "0"(a), "w"(b), "w"(c) 7669 : /* No clobbers */); 7670 return result; 7671 } 7672 7673 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 7674 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) 7675 { 7676 uint16x8_t result; 7677 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" 7678 : "=w"(result) 7679 : "0"(a), "w"(b), "w"(c) 7680 : /* No clobbers */); 7681 return result; 7682 } 7683 7684 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7685 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) 7686 { 7687 uint32x4_t result; 7688 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" 7689 : "=w"(result) 7690 : "0"(a), "w"(b), "w"(c) 7691 : /* No clobbers */); 7692 return result; 7693 } 7694 7695 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 7696 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) 7697 { 7698 uint64x2_t result; 7699 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" 7700 : "=w"(result) 7701 : "0"(a), "w"(b), "w"(c) 7702 : /* No clobbers */); 7703 return result; 7704 } 7705 7706 #define vmlsl_lane_s16(a, b, c, d) \ 7707 __extension__ \ 7708 ({ \ 7709 int16x4_t c_ = (c); \ 7710 int16x4_t b_ = (b); \ 7711 int32x4_t a_ = (a); \ 7712 int32x4_t result; \ 7713 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ 7714 : "=w"(result) \ 7715 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7716 : /* No clobbers */); \ 7717 result; \ 7718 }) 7719 7720 #define vmlsl_lane_s32(a, b, c, d) \ 7721 __extension__ \ 7722 ({ \ 7723 int32x2_t c_ = (c); \ 7724 int32x2_t b_ = (b); \ 7725 int64x2_t a_ = (a); \ 7726 int64x2_t result; \ 7727 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ 7728 : "=w"(result) \ 7729 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7730 : /* No clobbers */); \ 7731 result; \ 7732 }) 7733 7734 #define vmlsl_lane_u16(a, b, c, d) \ 7735 __extension__ \ 7736 ({ \ 7737 uint16x4_t c_ = (c); \ 7738 uint16x4_t b_ = (b); \ 7739 uint32x4_t a_ = (a); \ 7740 uint32x4_t result; \ 7741 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ 7742 : "=w"(result) \ 7743 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7744 : /* No clobbers */); \ 7745 result; \ 7746 }) 7747 7748 #define vmlsl_lane_u32(a, b, c, d) \ 7749 __extension__ \ 7750 ({ \ 7751 uint32x2_t c_ = (c); \ 7752 uint32x2_t b_ = (b); \ 7753 uint64x2_t a_ = (a); \ 7754 uint64x2_t result; \ 7755 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ 7756 : "=w"(result) \ 7757 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7758 : /* No clobbers */); \ 7759 result; \ 7760 }) 7761 7762 #define vmlsl_laneq_s16(a, b, c, d) \ 7763 __extension__ \ 7764 ({ \ 7765 int16x8_t c_ = (c); \ 7766 int16x4_t b_ = (b); \ 7767 int32x4_t a_ = (a); \ 7768 int32x4_t result; \ 7769 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ 7770 : "=w"(result) \ 7771 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7772 : /* No clobbers */); \ 7773 result; \ 7774 }) 7775 7776 #define vmlsl_laneq_s32(a, b, c, d) \ 7777 __extension__ \ 7778 ({ \ 7779 int32x4_t c_ = (c); \ 7780 int32x2_t b_ = (b); \ 7781 int64x2_t a_ = (a); \ 7782 int64x2_t result; \ 7783 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ 7784 : "=w"(result) \ 7785 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7786 : /* No clobbers */); \ 7787 result; \ 7788 }) 7789 7790 #define vmlsl_laneq_u16(a, b, c, d) \ 7791 __extension__ \ 7792 ({ \ 7793 uint16x8_t c_ = (c); \ 7794 uint16x4_t b_ = (b); \ 7795 uint32x4_t a_ = (a); \ 7796 uint32x4_t result; \ 7797 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ 7798 : "=w"(result) \ 7799 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ 7800 : /* No clobbers */); \ 7801 result; \ 7802 }) 7803 7804 #define vmlsl_laneq_u32(a, b, c, d) \ 7805 __extension__ \ 7806 ({ \ 7807 uint32x4_t c_ = (c); \ 7808 uint32x2_t b_ = (b); \ 7809 uint64x2_t a_ = (a); \ 7810 uint64x2_t result; \ 7811 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ 7812 : "=w"(result) \ 7813 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ 7814 : /* No clobbers */); \ 7815 result; \ 7816 }) 7817 7818 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7819 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) 7820 { 7821 int32x4_t result; 7822 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" 7823 : "=w"(result) 7824 : "0"(a), "w"(b), "x"(c) 7825 : /* No clobbers */); 7826 return result; 7827 } 7828 7829 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 7830 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) 7831 { 7832 int64x2_t result; 7833 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" 7834 : "=w"(result) 7835 : "0"(a), "w"(b), "w"(c) 7836 : /* No clobbers */); 7837 return result; 7838 } 7839 7840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7841 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) 7842 { 7843 uint32x4_t result; 7844 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" 7845 : "=w"(result) 7846 : "0"(a), "w"(b), "x"(c) 7847 : /* No clobbers */); 7848 return result; 7849 } 7850 7851 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 7852 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) 7853 { 7854 uint64x2_t result; 7855 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" 7856 : "=w"(result) 7857 : "0"(a), "w"(b), "w"(c) 7858 : /* No clobbers */); 7859 return result; 7860 } 7861 7862 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 7863 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) 7864 { 7865 int16x8_t result; 7866 __asm__ ("smlsl %0.8h, %2.8b, %3.8b" 7867 : "=w"(result) 7868 : "0"(a), "w"(b), "w"(c) 7869 : /* No clobbers */); 7870 return result; 7871 } 7872 7873 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7874 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) 7875 { 7876 int32x4_t result; 7877 __asm__ ("smlsl %0.4s, %2.4h, %3.4h" 7878 : "=w"(result) 7879 : "0"(a), "w"(b), "w"(c) 7880 : /* No clobbers */); 7881 return result; 7882 } 7883 7884 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 7885 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) 7886 { 7887 int64x2_t result; 7888 __asm__ ("smlsl %0.2d, %2.2s, %3.2s" 7889 : "=w"(result) 7890 : "0"(a), "w"(b), "w"(c) 7891 : /* No clobbers */); 7892 return result; 7893 } 7894 7895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 7896 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) 7897 { 7898 uint16x8_t result; 7899 __asm__ ("umlsl %0.8h, %2.8b, %3.8b" 7900 : "=w"(result) 7901 : "0"(a), "w"(b), "w"(c) 7902 : /* No clobbers */); 7903 return result; 7904 } 7905 7906 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7907 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) 7908 { 7909 uint32x4_t result; 7910 __asm__ ("umlsl %0.4s, %2.4h, %3.4h" 7911 : "=w"(result) 7912 : "0"(a), "w"(b), "w"(c) 7913 : /* No clobbers */); 7914 return result; 7915 } 7916 7917 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 7918 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) 7919 { 7920 uint64x2_t result; 7921 __asm__ ("umlsl %0.2d, %2.2s, %3.2s" 7922 : "=w"(result) 7923 : "0"(a), "w"(b), "w"(c) 7924 : /* No clobbers */); 7925 return result; 7926 } 7927 7928 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 7929 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) 7930 { 7931 float32x4_t result; 7932 float32x4_t t1; 7933 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" 7934 : "=w"(result), "=w"(t1) 7935 : "0"(a), "w"(b), "w"(c) 7936 : /* No clobbers */); 7937 return result; 7938 } 7939 7940 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 7941 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) 7942 { 7943 float64x2_t result; 7944 float64x2_t t1; 7945 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d" 7946 : "=w"(result), "=w"(t1) 7947 : "0"(a), "w"(b), "x"(c) 7948 : /* No clobbers */); 7949 return result; 7950 } 7951 7952 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 7953 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) 7954 { 7955 int16x8_t result; 7956 __asm__ ("mls %0.8h, %2.8h, %3.h[0]" 7957 : "=w"(result) 7958 : "0"(a), "w"(b), "x"(c) 7959 : /* No clobbers */); 7960 return result; 7961 } 7962 7963 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 7964 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) 7965 { 7966 int32x4_t result; 7967 __asm__ ("mls %0.4s, %2.4s, %3.s[0]" 7968 : "=w"(result) 7969 : "0"(a), "w"(b), "w"(c) 7970 : /* No clobbers */); 7971 return result; 7972 } 7973 7974 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 7975 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) 7976 { 7977 uint16x8_t result; 7978 __asm__ ("mls %0.8h, %2.8h, %3.h[0]" 7979 : "=w"(result) 7980 : "0"(a), "w"(b), "x"(c) 7981 : /* No clobbers */); 7982 return result; 7983 } 7984 7985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 7986 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) 7987 { 7988 uint32x4_t result; 7989 __asm__ ("mls %0.4s, %2.4s, %3.s[0]" 7990 : "=w"(result) 7991 : "0"(a), "w"(b), "w"(c) 7992 : /* No clobbers */); 7993 return result; 7994 } 7995 7996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 7997 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) 7998 { 7999 int8x16_t result; 8000 __asm__ ("mls %0.16b,%2.16b,%3.16b" 8001 : "=w"(result) 8002 : "0"(a), "w"(b), "w"(c) 8003 : /* No clobbers */); 8004 return result; 8005 } 8006 8007 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 8008 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) 8009 { 8010 int16x8_t result; 8011 __asm__ ("mls %0.8h,%2.8h,%3.8h" 8012 : "=w"(result) 8013 : "0"(a), "w"(b), "w"(c) 8014 : /* No clobbers */); 8015 return result; 8016 } 8017 8018 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8019 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) 8020 { 8021 int32x4_t result; 8022 __asm__ ("mls %0.4s,%2.4s,%3.4s" 8023 : "=w"(result) 8024 : "0"(a), "w"(b), "w"(c) 8025 : /* No clobbers */); 8026 return result; 8027 } 8028 8029 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 8030 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) 8031 { 8032 uint8x16_t result; 8033 __asm__ ("mls %0.16b,%2.16b,%3.16b" 8034 : "=w"(result) 8035 : "0"(a), "w"(b), "w"(c) 8036 : /* No clobbers */); 8037 return result; 8038 } 8039 8040 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 8041 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) 8042 { 8043 uint16x8_t result; 8044 __asm__ ("mls %0.8h,%2.8h,%3.8h" 8045 : "=w"(result) 8046 : "0"(a), "w"(b), "w"(c) 8047 : /* No clobbers */); 8048 return result; 8049 } 8050 8051 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8052 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) 8053 { 8054 uint32x4_t result; 8055 __asm__ ("mls %0.4s,%2.4s,%3.4s" 8056 : "=w"(result) 8057 : "0"(a), "w"(b), "w"(c) 8058 : /* No clobbers */); 8059 return result; 8060 } 8061 8062 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 8063 vmovl_high_s8 (int8x16_t a) 8064 { 8065 int16x8_t result; 8066 __asm__ ("sshll2 %0.8h,%1.16b,#0" 8067 : "=w"(result) 8068 : "w"(a) 8069 : /* No clobbers */); 8070 return result; 8071 } 8072 8073 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8074 vmovl_high_s16 (int16x8_t a) 8075 { 8076 int32x4_t result; 8077 __asm__ ("sshll2 %0.4s,%1.8h,#0" 8078 : "=w"(result) 8079 : "w"(a) 8080 : /* No clobbers */); 8081 return result; 8082 } 8083 8084 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 8085 vmovl_high_s32 (int32x4_t a) 8086 { 8087 int64x2_t result; 8088 __asm__ ("sshll2 %0.2d,%1.4s,#0" 8089 : "=w"(result) 8090 : "w"(a) 8091 : /* No clobbers */); 8092 return result; 8093 } 8094 8095 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 8096 vmovl_high_u8 (uint8x16_t a) 8097 { 8098 uint16x8_t result; 8099 __asm__ ("ushll2 %0.8h,%1.16b,#0" 8100 : "=w"(result) 8101 : "w"(a) 8102 : /* No clobbers */); 8103 return result; 8104 } 8105 8106 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8107 vmovl_high_u16 (uint16x8_t a) 8108 { 8109 uint32x4_t result; 8110 __asm__ ("ushll2 %0.4s,%1.8h,#0" 8111 : "=w"(result) 8112 : "w"(a) 8113 : /* No clobbers */); 8114 return result; 8115 } 8116 8117 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 8118 vmovl_high_u32 (uint32x4_t a) 8119 { 8120 uint64x2_t result; 8121 __asm__ ("ushll2 %0.2d,%1.4s,#0" 8122 : "=w"(result) 8123 : "w"(a) 8124 : /* No clobbers */); 8125 return result; 8126 } 8127 8128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 8129 vmovl_s8 (int8x8_t a) 8130 { 8131 int16x8_t result; 8132 __asm__ ("sshll %0.8h,%1.8b,#0" 8133 : "=w"(result) 8134 : "w"(a) 8135 : /* No clobbers */); 8136 return result; 8137 } 8138 8139 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8140 vmovl_s16 (int16x4_t a) 8141 { 8142 int32x4_t result; 8143 __asm__ ("sshll %0.4s,%1.4h,#0" 8144 : "=w"(result) 8145 : "w"(a) 8146 : /* No clobbers */); 8147 return result; 8148 } 8149 8150 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 8151 vmovl_s32 (int32x2_t a) 8152 { 8153 int64x2_t result; 8154 __asm__ ("sshll %0.2d,%1.2s,#0" 8155 : "=w"(result) 8156 : "w"(a) 8157 : /* No clobbers */); 8158 return result; 8159 } 8160 8161 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 8162 vmovl_u8 (uint8x8_t a) 8163 { 8164 uint16x8_t result; 8165 __asm__ ("ushll %0.8h,%1.8b,#0" 8166 : "=w"(result) 8167 : "w"(a) 8168 : /* No clobbers */); 8169 return result; 8170 } 8171 8172 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8173 vmovl_u16 (uint16x4_t a) 8174 { 8175 uint32x4_t result; 8176 __asm__ ("ushll %0.4s,%1.4h,#0" 8177 : "=w"(result) 8178 : "w"(a) 8179 : /* No clobbers */); 8180 return result; 8181 } 8182 8183 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 8184 vmovl_u32 (uint32x2_t a) 8185 { 8186 uint64x2_t result; 8187 __asm__ ("ushll %0.2d,%1.2s,#0" 8188 : "=w"(result) 8189 : "w"(a) 8190 : /* No clobbers */); 8191 return result; 8192 } 8193 8194 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 8195 vmovn_high_s16 (int8x8_t a, int16x8_t b) 8196 { 8197 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); 8198 __asm__ ("xtn2 %0.16b,%1.8h" 8199 : "+w"(result) 8200 : "w"(b) 8201 : /* No clobbers */); 8202 return result; 8203 } 8204 8205 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 8206 vmovn_high_s32 (int16x4_t a, int32x4_t b) 8207 { 8208 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); 8209 __asm__ ("xtn2 %0.8h,%1.4s" 8210 : "+w"(result) 8211 : "w"(b) 8212 : /* No clobbers */); 8213 return result; 8214 } 8215 8216 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8217 vmovn_high_s64 (int32x2_t a, int64x2_t b) 8218 { 8219 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); 8220 __asm__ ("xtn2 %0.4s,%1.2d" 8221 : "+w"(result) 8222 : "w"(b) 8223 : /* No clobbers */); 8224 return result; 8225 } 8226 8227 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 8228 vmovn_high_u16 (uint8x8_t a, uint16x8_t b) 8229 { 8230 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); 8231 __asm__ ("xtn2 %0.16b,%1.8h" 8232 : "+w"(result) 8233 : "w"(b) 8234 : /* No clobbers */); 8235 return result; 8236 } 8237 8238 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 8239 vmovn_high_u32 (uint16x4_t a, uint32x4_t b) 8240 { 8241 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); 8242 __asm__ ("xtn2 %0.8h,%1.4s" 8243 : "+w"(result) 8244 : "w"(b) 8245 : /* No clobbers */); 8246 return result; 8247 } 8248 8249 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8250 vmovn_high_u64 (uint32x2_t a, uint64x2_t b) 8251 { 8252 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); 8253 __asm__ ("xtn2 %0.4s,%1.2d" 8254 : "+w"(result) 8255 : "w"(b) 8256 : /* No clobbers */); 8257 return result; 8258 } 8259 8260 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 8261 vmovn_s16 (int16x8_t a) 8262 { 8263 int8x8_t result; 8264 __asm__ ("xtn %0.8b,%1.8h" 8265 : "=w"(result) 8266 : "w"(a) 8267 : /* No clobbers */); 8268 return result; 8269 } 8270 8271 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 8272 vmovn_s32 (int32x4_t a) 8273 { 8274 int16x4_t result; 8275 __asm__ ("xtn %0.4h,%1.4s" 8276 : "=w"(result) 8277 : "w"(a) 8278 : /* No clobbers */); 8279 return result; 8280 } 8281 8282 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 8283 vmovn_s64 (int64x2_t a) 8284 { 8285 int32x2_t result; 8286 __asm__ ("xtn %0.2s,%1.2d" 8287 : "=w"(result) 8288 : "w"(a) 8289 : /* No clobbers */); 8290 return result; 8291 } 8292 8293 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 8294 vmovn_u16 (uint16x8_t a) 8295 { 8296 uint8x8_t result; 8297 __asm__ ("xtn %0.8b,%1.8h" 8298 : "=w"(result) 8299 : "w"(a) 8300 : /* No clobbers */); 8301 return result; 8302 } 8303 8304 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 8305 vmovn_u32 (uint32x4_t a) 8306 { 8307 uint16x4_t result; 8308 __asm__ ("xtn %0.4h,%1.4s" 8309 : "=w"(result) 8310 : "w"(a) 8311 : /* No clobbers */); 8312 return result; 8313 } 8314 8315 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 8316 vmovn_u64 (uint64x2_t a) 8317 { 8318 uint32x2_t result; 8319 __asm__ ("xtn %0.2s,%1.2d" 8320 : "=w"(result) 8321 : "w"(a) 8322 : /* No clobbers */); 8323 return result; 8324 } 8325 8326 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 8327 vmul_n_f32 (float32x2_t a, float32_t b) 8328 { 8329 float32x2_t result; 8330 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]" 8331 : "=w"(result) 8332 : "w"(a), "w"(b) 8333 : /* No clobbers */); 8334 return result; 8335 } 8336 8337 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 8338 vmul_n_s16 (int16x4_t a, int16_t b) 8339 { 8340 int16x4_t result; 8341 __asm__ ("mul %0.4h,%1.4h,%2.h[0]" 8342 : "=w"(result) 8343 : "w"(a), "x"(b) 8344 : /* No clobbers */); 8345 return result; 8346 } 8347 8348 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 8349 vmul_n_s32 (int32x2_t a, int32_t b) 8350 { 8351 int32x2_t result; 8352 __asm__ ("mul %0.2s,%1.2s,%2.s[0]" 8353 : "=w"(result) 8354 : "w"(a), "w"(b) 8355 : /* No clobbers */); 8356 return result; 8357 } 8358 8359 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 8360 vmul_n_u16 (uint16x4_t a, uint16_t b) 8361 { 8362 uint16x4_t result; 8363 __asm__ ("mul %0.4h,%1.4h,%2.h[0]" 8364 : "=w"(result) 8365 : "w"(a), "x"(b) 8366 : /* No clobbers */); 8367 return result; 8368 } 8369 8370 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 8371 vmul_n_u32 (uint32x2_t a, uint32_t b) 8372 { 8373 uint32x2_t result; 8374 __asm__ ("mul %0.2s,%1.2s,%2.s[0]" 8375 : "=w"(result) 8376 : "w"(a), "w"(b) 8377 : /* No clobbers */); 8378 return result; 8379 } 8380 8381 #define vmuld_lane_f64(a, b, c) \ 8382 __extension__ \ 8383 ({ \ 8384 float64x2_t b_ = (b); \ 8385 float64_t a_ = (a); \ 8386 float64_t result; \ 8387 __asm__ ("fmul %d0,%d1,%2.d[%3]" \ 8388 : "=w"(result) \ 8389 : "w"(a_), "w"(b_), "i"(c) \ 8390 : /* No clobbers */); \ 8391 result; \ 8392 }) 8393 8394 #define vmull_high_lane_s16(a, b, c) \ 8395 __extension__ \ 8396 ({ \ 8397 int16x4_t b_ = (b); \ 8398 int16x8_t a_ = (a); \ 8399 int32x4_t result; \ 8400 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ 8401 : "=w"(result) \ 8402 : "w"(a_), "x"(b_), "i"(c) \ 8403 : /* No clobbers */); \ 8404 result; \ 8405 }) 8406 8407 #define vmull_high_lane_s32(a, b, c) \ 8408 __extension__ \ 8409 ({ \ 8410 int32x2_t b_ = (b); \ 8411 int32x4_t a_ = (a); \ 8412 int64x2_t result; \ 8413 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ 8414 : "=w"(result) \ 8415 : "w"(a_), "w"(b_), "i"(c) \ 8416 : /* No clobbers */); \ 8417 result; \ 8418 }) 8419 8420 #define vmull_high_lane_u16(a, b, c) \ 8421 __extension__ \ 8422 ({ \ 8423 uint16x4_t b_ = (b); \ 8424 uint16x8_t a_ = (a); \ 8425 uint32x4_t result; \ 8426 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ 8427 : "=w"(result) \ 8428 : "w"(a_), "x"(b_), "i"(c) \ 8429 : /* No clobbers */); \ 8430 result; \ 8431 }) 8432 8433 #define vmull_high_lane_u32(a, b, c) \ 8434 __extension__ \ 8435 ({ \ 8436 uint32x2_t b_ = (b); \ 8437 uint32x4_t a_ = (a); \ 8438 uint64x2_t result; \ 8439 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ 8440 : "=w"(result) \ 8441 : "w"(a_), "w"(b_), "i"(c) \ 8442 : /* No clobbers */); \ 8443 result; \ 8444 }) 8445 8446 #define vmull_high_laneq_s16(a, b, c) \ 8447 __extension__ \ 8448 ({ \ 8449 int16x8_t b_ = (b); \ 8450 int16x8_t a_ = (a); \ 8451 int32x4_t result; \ 8452 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ 8453 : "=w"(result) \ 8454 : "w"(a_), "x"(b_), "i"(c) \ 8455 : /* No clobbers */); \ 8456 result; \ 8457 }) 8458 8459 #define vmull_high_laneq_s32(a, b, c) \ 8460 __extension__ \ 8461 ({ \ 8462 int32x4_t b_ = (b); \ 8463 int32x4_t a_ = (a); \ 8464 int64x2_t result; \ 8465 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ 8466 : "=w"(result) \ 8467 : "w"(a_), "w"(b_), "i"(c) \ 8468 : /* No clobbers */); \ 8469 result; \ 8470 }) 8471 8472 #define vmull_high_laneq_u16(a, b, c) \ 8473 __extension__ \ 8474 ({ \ 8475 uint16x8_t b_ = (b); \ 8476 uint16x8_t a_ = (a); \ 8477 uint32x4_t result; \ 8478 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ 8479 : "=w"(result) \ 8480 : "w"(a_), "x"(b_), "i"(c) \ 8481 : /* No clobbers */); \ 8482 result; \ 8483 }) 8484 8485 #define vmull_high_laneq_u32(a, b, c) \ 8486 __extension__ \ 8487 ({ \ 8488 uint32x4_t b_ = (b); \ 8489 uint32x4_t a_ = (a); \ 8490 uint64x2_t result; \ 8491 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ 8492 : "=w"(result) \ 8493 : "w"(a_), "w"(b_), "i"(c) \ 8494 : /* No clobbers */); \ 8495 result; \ 8496 }) 8497 8498 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8499 vmull_high_n_s16 (int16x8_t a, int16_t b) 8500 { 8501 int32x4_t result; 8502 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" 8503 : "=w"(result) 8504 : "w"(a), "x"(b) 8505 : /* No clobbers */); 8506 return result; 8507 } 8508 8509 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 8510 vmull_high_n_s32 (int32x4_t a, int32_t b) 8511 { 8512 int64x2_t result; 8513 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" 8514 : "=w"(result) 8515 : "w"(a), "w"(b) 8516 : /* No clobbers */); 8517 return result; 8518 } 8519 8520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8521 vmull_high_n_u16 (uint16x8_t a, uint16_t b) 8522 { 8523 uint32x4_t result; 8524 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" 8525 : "=w"(result) 8526 : "w"(a), "x"(b) 8527 : /* No clobbers */); 8528 return result; 8529 } 8530 8531 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 8532 vmull_high_n_u32 (uint32x4_t a, uint32_t b) 8533 { 8534 uint64x2_t result; 8535 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" 8536 : "=w"(result) 8537 : "w"(a), "w"(b) 8538 : /* No clobbers */); 8539 return result; 8540 } 8541 8542 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 8543 vmull_high_p8 (poly8x16_t a, poly8x16_t b) 8544 { 8545 poly16x8_t result; 8546 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" 8547 : "=w"(result) 8548 : "w"(a), "w"(b) 8549 : /* No clobbers */); 8550 return result; 8551 } 8552 8553 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 8554 vmull_high_s8 (int8x16_t a, int8x16_t b) 8555 { 8556 int16x8_t result; 8557 __asm__ ("smull2 %0.8h,%1.16b,%2.16b" 8558 : "=w"(result) 8559 : "w"(a), "w"(b) 8560 : /* No clobbers */); 8561 return result; 8562 } 8563 8564 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8565 vmull_high_s16 (int16x8_t a, int16x8_t b) 8566 { 8567 int32x4_t result; 8568 __asm__ ("smull2 %0.4s,%1.8h,%2.8h" 8569 : "=w"(result) 8570 : "w"(a), "w"(b) 8571 : /* No clobbers */); 8572 return result; 8573 } 8574 8575 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 8576 vmull_high_s32 (int32x4_t a, int32x4_t b) 8577 { 8578 int64x2_t result; 8579 __asm__ ("smull2 %0.2d,%1.4s,%2.4s" 8580 : "=w"(result) 8581 : "w"(a), "w"(b) 8582 : /* No clobbers */); 8583 return result; 8584 } 8585 8586 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 8587 vmull_high_u8 (uint8x16_t a, uint8x16_t b) 8588 { 8589 uint16x8_t result; 8590 __asm__ ("umull2 %0.8h,%1.16b,%2.16b" 8591 : "=w"(result) 8592 : "w"(a), "w"(b) 8593 : /* No clobbers */); 8594 return result; 8595 } 8596 8597 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8598 vmull_high_u16 (uint16x8_t a, uint16x8_t b) 8599 { 8600 uint32x4_t result; 8601 __asm__ ("umull2 %0.4s,%1.8h,%2.8h" 8602 : "=w"(result) 8603 : "w"(a), "w"(b) 8604 : /* No clobbers */); 8605 return result; 8606 } 8607 8608 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 8609 vmull_high_u32 (uint32x4_t a, uint32x4_t b) 8610 { 8611 uint64x2_t result; 8612 __asm__ ("umull2 %0.2d,%1.4s,%2.4s" 8613 : "=w"(result) 8614 : "w"(a), "w"(b) 8615 : /* No clobbers */); 8616 return result; 8617 } 8618 8619 #define vmull_lane_s16(a, b, c) \ 8620 __extension__ \ 8621 ({ \ 8622 int16x4_t b_ = (b); \ 8623 int16x4_t a_ = (a); \ 8624 int32x4_t result; \ 8625 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ 8626 : "=w"(result) \ 8627 : "w"(a_), "x"(b_), "i"(c) \ 8628 : /* No clobbers */); \ 8629 result; \ 8630 }) 8631 8632 #define vmull_lane_s32(a, b, c) \ 8633 __extension__ \ 8634 ({ \ 8635 int32x2_t b_ = (b); \ 8636 int32x2_t a_ = (a); \ 8637 int64x2_t result; \ 8638 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ 8639 : "=w"(result) \ 8640 : "w"(a_), "w"(b_), "i"(c) \ 8641 : /* No clobbers */); \ 8642 result; \ 8643 }) 8644 8645 #define vmull_lane_u16(a, b, c) \ 8646 __extension__ \ 8647 ({ \ 8648 uint16x4_t b_ = (b); \ 8649 uint16x4_t a_ = (a); \ 8650 uint32x4_t result; \ 8651 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ 8652 : "=w"(result) \ 8653 : "w"(a_), "x"(b_), "i"(c) \ 8654 : /* No clobbers */); \ 8655 result; \ 8656 }) 8657 8658 #define vmull_lane_u32(a, b, c) \ 8659 __extension__ \ 8660 ({ \ 8661 uint32x2_t b_ = (b); \ 8662 uint32x2_t a_ = (a); \ 8663 uint64x2_t result; \ 8664 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ 8665 : "=w"(result) \ 8666 : "w"(a_), "w"(b_), "i"(c) \ 8667 : /* No clobbers */); \ 8668 result; \ 8669 }) 8670 8671 #define vmull_laneq_s16(a, b, c) \ 8672 __extension__ \ 8673 ({ \ 8674 int16x8_t b_ = (b); \ 8675 int16x4_t a_ = (a); \ 8676 int32x4_t result; \ 8677 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ 8678 : "=w"(result) \ 8679 : "w"(a_), "x"(b_), "i"(c) \ 8680 : /* No clobbers */); \ 8681 result; \ 8682 }) 8683 8684 #define vmull_laneq_s32(a, b, c) \ 8685 __extension__ \ 8686 ({ \ 8687 int32x4_t b_ = (b); \ 8688 int32x2_t a_ = (a); \ 8689 int64x2_t result; \ 8690 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ 8691 : "=w"(result) \ 8692 : "w"(a_), "w"(b_), "i"(c) \ 8693 : /* No clobbers */); \ 8694 result; \ 8695 }) 8696 8697 #define vmull_laneq_u16(a, b, c) \ 8698 __extension__ \ 8699 ({ \ 8700 uint16x8_t b_ = (b); \ 8701 uint16x4_t a_ = (a); \ 8702 uint32x4_t result; \ 8703 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ 8704 : "=w"(result) \ 8705 : "w"(a_), "x"(b_), "i"(c) \ 8706 : /* No clobbers */); \ 8707 result; \ 8708 }) 8709 8710 #define vmull_laneq_u32(a, b, c) \ 8711 __extension__ \ 8712 ({ \ 8713 uint32x4_t b_ = (b); \ 8714 uint32x2_t a_ = (a); \ 8715 uint64x2_t result; \ 8716 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ 8717 : "=w"(result) \ 8718 : "w"(a_), "w"(b_), "i"(c) \ 8719 : /* No clobbers */); \ 8720 result; \ 8721 }) 8722 8723 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8724 vmull_n_s16 (int16x4_t a, int16_t b) 8725 { 8726 int32x4_t result; 8727 __asm__ ("smull %0.4s,%1.4h,%2.h[0]" 8728 : "=w"(result) 8729 : "w"(a), "x"(b) 8730 : /* No clobbers */); 8731 return result; 8732 } 8733 8734 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 8735 vmull_n_s32 (int32x2_t a, int32_t b) 8736 { 8737 int64x2_t result; 8738 __asm__ ("smull %0.2d,%1.2s,%2.s[0]" 8739 : "=w"(result) 8740 : "w"(a), "w"(b) 8741 : /* No clobbers */); 8742 return result; 8743 } 8744 8745 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8746 vmull_n_u16 (uint16x4_t a, uint16_t b) 8747 { 8748 uint32x4_t result; 8749 __asm__ ("umull %0.4s,%1.4h,%2.h[0]" 8750 : "=w"(result) 8751 : "w"(a), "x"(b) 8752 : /* No clobbers */); 8753 return result; 8754 } 8755 8756 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 8757 vmull_n_u32 (uint32x2_t a, uint32_t b) 8758 { 8759 uint64x2_t result; 8760 __asm__ ("umull %0.2d,%1.2s,%2.s[0]" 8761 : "=w"(result) 8762 : "w"(a), "w"(b) 8763 : /* No clobbers */); 8764 return result; 8765 } 8766 8767 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 8768 vmull_p8 (poly8x8_t a, poly8x8_t b) 8769 { 8770 poly16x8_t result; 8771 __asm__ ("pmull %0.8h, %1.8b, %2.8b" 8772 : "=w"(result) 8773 : "w"(a), "w"(b) 8774 : /* No clobbers */); 8775 return result; 8776 } 8777 8778 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 8779 vmull_s8 (int8x8_t a, int8x8_t b) 8780 { 8781 int16x8_t result; 8782 __asm__ ("smull %0.8h, %1.8b, %2.8b" 8783 : "=w"(result) 8784 : "w"(a), "w"(b) 8785 : /* No clobbers */); 8786 return result; 8787 } 8788 8789 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8790 vmull_s16 (int16x4_t a, int16x4_t b) 8791 { 8792 int32x4_t result; 8793 __asm__ ("smull %0.4s, %1.4h, %2.4h" 8794 : "=w"(result) 8795 : "w"(a), "w"(b) 8796 : /* No clobbers */); 8797 return result; 8798 } 8799 8800 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 8801 vmull_s32 (int32x2_t a, int32x2_t b) 8802 { 8803 int64x2_t result; 8804 __asm__ ("smull %0.2d, %1.2s, %2.2s" 8805 : "=w"(result) 8806 : "w"(a), "w"(b) 8807 : /* No clobbers */); 8808 return result; 8809 } 8810 8811 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 8812 vmull_u8 (uint8x8_t a, uint8x8_t b) 8813 { 8814 uint16x8_t result; 8815 __asm__ ("umull %0.8h, %1.8b, %2.8b" 8816 : "=w"(result) 8817 : "w"(a), "w"(b) 8818 : /* No clobbers */); 8819 return result; 8820 } 8821 8822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8823 vmull_u16 (uint16x4_t a, uint16x4_t b) 8824 { 8825 uint32x4_t result; 8826 __asm__ ("umull %0.4s, %1.4h, %2.4h" 8827 : "=w"(result) 8828 : "w"(a), "w"(b) 8829 : /* No clobbers */); 8830 return result; 8831 } 8832 8833 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 8834 vmull_u32 (uint32x2_t a, uint32x2_t b) 8835 { 8836 uint64x2_t result; 8837 __asm__ ("umull %0.2d, %1.2s, %2.2s" 8838 : "=w"(result) 8839 : "w"(a), "w"(b) 8840 : /* No clobbers */); 8841 return result; 8842 } 8843 8844 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 8845 vmulq_n_f32 (float32x4_t a, float32_t b) 8846 { 8847 float32x4_t result; 8848 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]" 8849 : "=w"(result) 8850 : "w"(a), "w"(b) 8851 : /* No clobbers */); 8852 return result; 8853 } 8854 8855 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 8856 vmulq_n_f64 (float64x2_t a, float64_t b) 8857 { 8858 float64x2_t result; 8859 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]" 8860 : "=w"(result) 8861 : "w"(a), "w"(b) 8862 : /* No clobbers */); 8863 return result; 8864 } 8865 8866 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 8867 vmulq_n_s16 (int16x8_t a, int16_t b) 8868 { 8869 int16x8_t result; 8870 __asm__ ("mul %0.8h,%1.8h,%2.h[0]" 8871 : "=w"(result) 8872 : "w"(a), "x"(b) 8873 : /* No clobbers */); 8874 return result; 8875 } 8876 8877 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 8878 vmulq_n_s32 (int32x4_t a, int32_t b) 8879 { 8880 int32x4_t result; 8881 __asm__ ("mul %0.4s,%1.4s,%2.s[0]" 8882 : "=w"(result) 8883 : "w"(a), "w"(b) 8884 : /* No clobbers */); 8885 return result; 8886 } 8887 8888 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 8889 vmulq_n_u16 (uint16x8_t a, uint16_t b) 8890 { 8891 uint16x8_t result; 8892 __asm__ ("mul %0.8h,%1.8h,%2.h[0]" 8893 : "=w"(result) 8894 : "w"(a), "x"(b) 8895 : /* No clobbers */); 8896 return result; 8897 } 8898 8899 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 8900 vmulq_n_u32 (uint32x4_t a, uint32_t b) 8901 { 8902 uint32x4_t result; 8903 __asm__ ("mul %0.4s,%1.4s,%2.s[0]" 8904 : "=w"(result) 8905 : "w"(a), "w"(b) 8906 : /* No clobbers */); 8907 return result; 8908 } 8909 8910 #define vmuls_lane_f32(a, b, c) \ 8911 __extension__ \ 8912 ({ \ 8913 float32x4_t b_ = (b); \ 8914 float32_t a_ = (a); \ 8915 float32_t result; \ 8916 __asm__ ("fmul %s0,%s1,%2.s[%3]" \ 8917 : "=w"(result) \ 8918 : "w"(a_), "w"(b_), "i"(c) \ 8919 : /* No clobbers */); \ 8920 result; \ 8921 }) 8922 8923 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 8924 vmulx_f32 (float32x2_t a, float32x2_t b) 8925 { 8926 float32x2_t result; 8927 __asm__ ("fmulx %0.2s,%1.2s,%2.2s" 8928 : "=w"(result) 8929 : "w"(a), "w"(b) 8930 : /* No clobbers */); 8931 return result; 8932 } 8933 8934 #define vmulx_lane_f32(a, b, c) \ 8935 __extension__ \ 8936 ({ \ 8937 float32x4_t b_ = (b); \ 8938 float32x2_t a_ = (a); \ 8939 float32x2_t result; \ 8940 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \ 8941 : "=w"(result) \ 8942 : "w"(a_), "w"(b_), "i"(c) \ 8943 : /* No clobbers */); \ 8944 result; \ 8945 }) 8946 8947 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 8948 vmulxd_f64 (float64_t a, float64_t b) 8949 { 8950 float64_t result; 8951 __asm__ ("fmulx %d0, %d1, %d2" 8952 : "=w"(result) 8953 : "w"(a), "w"(b) 8954 : /* No clobbers */); 8955 return result; 8956 } 8957 8958 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 8959 vmulxq_f32 (float32x4_t a, float32x4_t b) 8960 { 8961 float32x4_t result; 8962 __asm__ ("fmulx %0.4s,%1.4s,%2.4s" 8963 : "=w"(result) 8964 : "w"(a), "w"(b) 8965 : /* No clobbers */); 8966 return result; 8967 } 8968 8969 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 8970 vmulxq_f64 (float64x2_t a, float64x2_t b) 8971 { 8972 float64x2_t result; 8973 __asm__ ("fmulx %0.2d,%1.2d,%2.2d" 8974 : "=w"(result) 8975 : "w"(a), "w"(b) 8976 : /* No clobbers */); 8977 return result; 8978 } 8979 8980 #define vmulxq_lane_f32(a, b, c) \ 8981 __extension__ \ 8982 ({ \ 8983 float32x4_t b_ = (b); \ 8984 float32x4_t a_ = (a); \ 8985 float32x4_t result; \ 8986 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \ 8987 : "=w"(result) \ 8988 : "w"(a_), "w"(b_), "i"(c) \ 8989 : /* No clobbers */); \ 8990 result; \ 8991 }) 8992 8993 #define vmulxq_lane_f64(a, b, c) \ 8994 __extension__ \ 8995 ({ \ 8996 float64x2_t b_ = (b); \ 8997 float64x2_t a_ = (a); \ 8998 float64x2_t result; \ 8999 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \ 9000 : "=w"(result) \ 9001 : "w"(a_), "w"(b_), "i"(c) \ 9002 : /* No clobbers */); \ 9003 result; \ 9004 }) 9005 9006 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 9007 vmulxs_f32 (float32_t a, float32_t b) 9008 { 9009 float32_t result; 9010 __asm__ ("fmulx %s0, %s1, %s2" 9011 : "=w"(result) 9012 : "w"(a), "w"(b) 9013 : /* No clobbers */); 9014 return result; 9015 } 9016 9017 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 9018 vmvn_p8 (poly8x8_t a) 9019 { 9020 poly8x8_t result; 9021 __asm__ ("mvn %0.8b,%1.8b" 9022 : "=w"(result) 9023 : "w"(a) 9024 : /* No clobbers */); 9025 return result; 9026 } 9027 9028 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 9029 vmvn_s8 (int8x8_t a) 9030 { 9031 int8x8_t result; 9032 __asm__ ("mvn %0.8b,%1.8b" 9033 : "=w"(result) 9034 : "w"(a) 9035 : /* No clobbers */); 9036 return result; 9037 } 9038 9039 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 9040 vmvn_s16 (int16x4_t a) 9041 { 9042 int16x4_t result; 9043 __asm__ ("mvn %0.8b,%1.8b" 9044 : "=w"(result) 9045 : "w"(a) 9046 : /* No clobbers */); 9047 return result; 9048 } 9049 9050 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 9051 vmvn_s32 (int32x2_t a) 9052 { 9053 int32x2_t result; 9054 __asm__ ("mvn %0.8b,%1.8b" 9055 : "=w"(result) 9056 : "w"(a) 9057 : /* No clobbers */); 9058 return result; 9059 } 9060 9061 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 9062 vmvn_u8 (uint8x8_t a) 9063 { 9064 uint8x8_t result; 9065 __asm__ ("mvn %0.8b,%1.8b" 9066 : "=w"(result) 9067 : "w"(a) 9068 : /* No clobbers */); 9069 return result; 9070 } 9071 9072 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 9073 vmvn_u16 (uint16x4_t a) 9074 { 9075 uint16x4_t result; 9076 __asm__ ("mvn %0.8b,%1.8b" 9077 : "=w"(result) 9078 : "w"(a) 9079 : /* No clobbers */); 9080 return result; 9081 } 9082 9083 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 9084 vmvn_u32 (uint32x2_t a) 9085 { 9086 uint32x2_t result; 9087 __asm__ ("mvn %0.8b,%1.8b" 9088 : "=w"(result) 9089 : "w"(a) 9090 : /* No clobbers */); 9091 return result; 9092 } 9093 9094 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 9095 vmvnq_p8 (poly8x16_t a) 9096 { 9097 poly8x16_t result; 9098 __asm__ ("mvn %0.16b,%1.16b" 9099 : "=w"(result) 9100 : "w"(a) 9101 : /* No clobbers */); 9102 return result; 9103 } 9104 9105 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 9106 vmvnq_s8 (int8x16_t a) 9107 { 9108 int8x16_t result; 9109 __asm__ ("mvn %0.16b,%1.16b" 9110 : "=w"(result) 9111 : "w"(a) 9112 : /* No clobbers */); 9113 return result; 9114 } 9115 9116 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 9117 vmvnq_s16 (int16x8_t a) 9118 { 9119 int16x8_t result; 9120 __asm__ ("mvn %0.16b,%1.16b" 9121 : "=w"(result) 9122 : "w"(a) 9123 : /* No clobbers */); 9124 return result; 9125 } 9126 9127 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 9128 vmvnq_s32 (int32x4_t a) 9129 { 9130 int32x4_t result; 9131 __asm__ ("mvn %0.16b,%1.16b" 9132 : "=w"(result) 9133 : "w"(a) 9134 : /* No clobbers */); 9135 return result; 9136 } 9137 9138 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 9139 vmvnq_u8 (uint8x16_t a) 9140 { 9141 uint8x16_t result; 9142 __asm__ ("mvn %0.16b,%1.16b" 9143 : "=w"(result) 9144 : "w"(a) 9145 : /* No clobbers */); 9146 return result; 9147 } 9148 9149 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 9150 vmvnq_u16 (uint16x8_t a) 9151 { 9152 uint16x8_t result; 9153 __asm__ ("mvn %0.16b,%1.16b" 9154 : "=w"(result) 9155 : "w"(a) 9156 : /* No clobbers */); 9157 return result; 9158 } 9159 9160 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 9161 vmvnq_u32 (uint32x4_t a) 9162 { 9163 uint32x4_t result; 9164 __asm__ ("mvn %0.16b,%1.16b" 9165 : "=w"(result) 9166 : "w"(a) 9167 : /* No clobbers */); 9168 return result; 9169 } 9170 9171 9172 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 9173 vpadal_s8 (int16x4_t a, int8x8_t b) 9174 { 9175 int16x4_t result; 9176 __asm__ ("sadalp %0.4h,%2.8b" 9177 : "=w"(result) 9178 : "0"(a), "w"(b) 9179 : /* No clobbers */); 9180 return result; 9181 } 9182 9183 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 9184 vpadal_s16 (int32x2_t a, int16x4_t b) 9185 { 9186 int32x2_t result; 9187 __asm__ ("sadalp %0.2s,%2.4h" 9188 : "=w"(result) 9189 : "0"(a), "w"(b) 9190 : /* No clobbers */); 9191 return result; 9192 } 9193 9194 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 9195 vpadal_s32 (int64x1_t a, int32x2_t b) 9196 { 9197 int64x1_t result; 9198 __asm__ ("sadalp %0.1d,%2.2s" 9199 : "=w"(result) 9200 : "0"(a), "w"(b) 9201 : /* No clobbers */); 9202 return result; 9203 } 9204 9205 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 9206 vpadal_u8 (uint16x4_t a, uint8x8_t b) 9207 { 9208 uint16x4_t result; 9209 __asm__ ("uadalp %0.4h,%2.8b" 9210 : "=w"(result) 9211 : "0"(a), "w"(b) 9212 : /* No clobbers */); 9213 return result; 9214 } 9215 9216 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 9217 vpadal_u16 (uint32x2_t a, uint16x4_t b) 9218 { 9219 uint32x2_t result; 9220 __asm__ ("uadalp %0.2s,%2.4h" 9221 : "=w"(result) 9222 : "0"(a), "w"(b) 9223 : /* No clobbers */); 9224 return result; 9225 } 9226 9227 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 9228 vpadal_u32 (uint64x1_t a, uint32x2_t b) 9229 { 9230 uint64x1_t result; 9231 __asm__ ("uadalp %0.1d,%2.2s" 9232 : "=w"(result) 9233 : "0"(a), "w"(b) 9234 : /* No clobbers */); 9235 return result; 9236 } 9237 9238 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 9239 vpadalq_s8 (int16x8_t a, int8x16_t b) 9240 { 9241 int16x8_t result; 9242 __asm__ ("sadalp %0.8h,%2.16b" 9243 : "=w"(result) 9244 : "0"(a), "w"(b) 9245 : /* No clobbers */); 9246 return result; 9247 } 9248 9249 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 9250 vpadalq_s16 (int32x4_t a, int16x8_t b) 9251 { 9252 int32x4_t result; 9253 __asm__ ("sadalp %0.4s,%2.8h" 9254 : "=w"(result) 9255 : "0"(a), "w"(b) 9256 : /* No clobbers */); 9257 return result; 9258 } 9259 9260 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 9261 vpadalq_s32 (int64x2_t a, int32x4_t b) 9262 { 9263 int64x2_t result; 9264 __asm__ ("sadalp %0.2d,%2.4s" 9265 : "=w"(result) 9266 : "0"(a), "w"(b) 9267 : /* No clobbers */); 9268 return result; 9269 } 9270 9271 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 9272 vpadalq_u8 (uint16x8_t a, uint8x16_t b) 9273 { 9274 uint16x8_t result; 9275 __asm__ ("uadalp %0.8h,%2.16b" 9276 : "=w"(result) 9277 : "0"(a), "w"(b) 9278 : /* No clobbers */); 9279 return result; 9280 } 9281 9282 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 9283 vpadalq_u16 (uint32x4_t a, uint16x8_t b) 9284 { 9285 uint32x4_t result; 9286 __asm__ ("uadalp %0.4s,%2.8h" 9287 : "=w"(result) 9288 : "0"(a), "w"(b) 9289 : /* No clobbers */); 9290 return result; 9291 } 9292 9293 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 9294 vpadalq_u32 (uint64x2_t a, uint32x4_t b) 9295 { 9296 uint64x2_t result; 9297 __asm__ ("uadalp %0.2d,%2.4s" 9298 : "=w"(result) 9299 : "0"(a), "w"(b) 9300 : /* No clobbers */); 9301 return result; 9302 } 9303 9304 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 9305 vpadd_f32 (float32x2_t a, float32x2_t b) 9306 { 9307 float32x2_t result; 9308 __asm__ ("faddp %0.2s,%1.2s,%2.2s" 9309 : "=w"(result) 9310 : "w"(a), "w"(b) 9311 : /* No clobbers */); 9312 return result; 9313 } 9314 9315 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 9316 vpadd_s8 (int8x8_t __a, int8x8_t __b) 9317 { 9318 return __builtin_aarch64_addpv8qi (__a, __b); 9319 } 9320 9321 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 9322 vpadd_s16 (int16x4_t __a, int16x4_t __b) 9323 { 9324 return __builtin_aarch64_addpv4hi (__a, __b); 9325 } 9326 9327 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 9328 vpadd_s32 (int32x2_t __a, int32x2_t __b) 9329 { 9330 return __builtin_aarch64_addpv2si (__a, __b); 9331 } 9332 9333 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 9334 vpadd_u8 (uint8x8_t __a, uint8x8_t __b) 9335 { 9336 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, 9337 (int8x8_t) __b); 9338 } 9339 9340 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 9341 vpadd_u16 (uint16x4_t __a, uint16x4_t __b) 9342 { 9343 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, 9344 (int16x4_t) __b); 9345 } 9346 9347 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 9348 vpadd_u32 (uint32x2_t __a, uint32x2_t __b) 9349 { 9350 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, 9351 (int32x2_t) __b); 9352 } 9353 9354 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 9355 vpaddd_f64 (float64x2_t a) 9356 { 9357 float64_t result; 9358 __asm__ ("faddp %d0,%1.2d" 9359 : "=w"(result) 9360 : "w"(a) 9361 : /* No clobbers */); 9362 return result; 9363 } 9364 9365 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 9366 vpaddl_s8 (int8x8_t a) 9367 { 9368 int16x4_t result; 9369 __asm__ ("saddlp %0.4h,%1.8b" 9370 : "=w"(result) 9371 : "w"(a) 9372 : /* No clobbers */); 9373 return result; 9374 } 9375 9376 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 9377 vpaddl_s16 (int16x4_t a) 9378 { 9379 int32x2_t result; 9380 __asm__ ("saddlp %0.2s,%1.4h" 9381 : "=w"(result) 9382 : "w"(a) 9383 : /* No clobbers */); 9384 return result; 9385 } 9386 9387 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 9388 vpaddl_s32 (int32x2_t a) 9389 { 9390 int64x1_t result; 9391 __asm__ ("saddlp %0.1d,%1.2s" 9392 : "=w"(result) 9393 : "w"(a) 9394 : /* No clobbers */); 9395 return result; 9396 } 9397 9398 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 9399 vpaddl_u8 (uint8x8_t a) 9400 { 9401 uint16x4_t result; 9402 __asm__ ("uaddlp %0.4h,%1.8b" 9403 : "=w"(result) 9404 : "w"(a) 9405 : /* No clobbers */); 9406 return result; 9407 } 9408 9409 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 9410 vpaddl_u16 (uint16x4_t a) 9411 { 9412 uint32x2_t result; 9413 __asm__ ("uaddlp %0.2s,%1.4h" 9414 : "=w"(result) 9415 : "w"(a) 9416 : /* No clobbers */); 9417 return result; 9418 } 9419 9420 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 9421 vpaddl_u32 (uint32x2_t a) 9422 { 9423 uint64x1_t result; 9424 __asm__ ("uaddlp %0.1d,%1.2s" 9425 : "=w"(result) 9426 : "w"(a) 9427 : /* No clobbers */); 9428 return result; 9429 } 9430 9431 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 9432 vpaddlq_s8 (int8x16_t a) 9433 { 9434 int16x8_t result; 9435 __asm__ ("saddlp %0.8h,%1.16b" 9436 : "=w"(result) 9437 : "w"(a) 9438 : /* No clobbers */); 9439 return result; 9440 } 9441 9442 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 9443 vpaddlq_s16 (int16x8_t a) 9444 { 9445 int32x4_t result; 9446 __asm__ ("saddlp %0.4s,%1.8h" 9447 : "=w"(result) 9448 : "w"(a) 9449 : /* No clobbers */); 9450 return result; 9451 } 9452 9453 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 9454 vpaddlq_s32 (int32x4_t a) 9455 { 9456 int64x2_t result; 9457 __asm__ ("saddlp %0.2d,%1.4s" 9458 : "=w"(result) 9459 : "w"(a) 9460 : /* No clobbers */); 9461 return result; 9462 } 9463 9464 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 9465 vpaddlq_u8 (uint8x16_t a) 9466 { 9467 uint16x8_t result; 9468 __asm__ ("uaddlp %0.8h,%1.16b" 9469 : "=w"(result) 9470 : "w"(a) 9471 : /* No clobbers */); 9472 return result; 9473 } 9474 9475 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 9476 vpaddlq_u16 (uint16x8_t a) 9477 { 9478 uint32x4_t result; 9479 __asm__ ("uaddlp %0.4s,%1.8h" 9480 : "=w"(result) 9481 : "w"(a) 9482 : /* No clobbers */); 9483 return result; 9484 } 9485 9486 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 9487 vpaddlq_u32 (uint32x4_t a) 9488 { 9489 uint64x2_t result; 9490 __asm__ ("uaddlp %0.2d,%1.4s" 9491 : "=w"(result) 9492 : "w"(a) 9493 : /* No clobbers */); 9494 return result; 9495 } 9496 9497 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 9498 vpaddq_f32 (float32x4_t a, float32x4_t b) 9499 { 9500 float32x4_t result; 9501 __asm__ ("faddp %0.4s,%1.4s,%2.4s" 9502 : "=w"(result) 9503 : "w"(a), "w"(b) 9504 : /* No clobbers */); 9505 return result; 9506 } 9507 9508 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 9509 vpaddq_f64 (float64x2_t a, float64x2_t b) 9510 { 9511 float64x2_t result; 9512 __asm__ ("faddp %0.2d,%1.2d,%2.2d" 9513 : "=w"(result) 9514 : "w"(a), "w"(b) 9515 : /* No clobbers */); 9516 return result; 9517 } 9518 9519 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 9520 vpaddq_s8 (int8x16_t a, int8x16_t b) 9521 { 9522 int8x16_t result; 9523 __asm__ ("addp %0.16b,%1.16b,%2.16b" 9524 : "=w"(result) 9525 : "w"(a), "w"(b) 9526 : /* No clobbers */); 9527 return result; 9528 } 9529 9530 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 9531 vpaddq_s16 (int16x8_t a, int16x8_t b) 9532 { 9533 int16x8_t result; 9534 __asm__ ("addp %0.8h,%1.8h,%2.8h" 9535 : "=w"(result) 9536 : "w"(a), "w"(b) 9537 : /* No clobbers */); 9538 return result; 9539 } 9540 9541 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 9542 vpaddq_s32 (int32x4_t a, int32x4_t b) 9543 { 9544 int32x4_t result; 9545 __asm__ ("addp %0.4s,%1.4s,%2.4s" 9546 : "=w"(result) 9547 : "w"(a), "w"(b) 9548 : /* No clobbers */); 9549 return result; 9550 } 9551 9552 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 9553 vpaddq_s64 (int64x2_t a, int64x2_t b) 9554 { 9555 int64x2_t result; 9556 __asm__ ("addp %0.2d,%1.2d,%2.2d" 9557 : "=w"(result) 9558 : "w"(a), "w"(b) 9559 : /* No clobbers */); 9560 return result; 9561 } 9562 9563 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 9564 vpaddq_u8 (uint8x16_t a, uint8x16_t b) 9565 { 9566 uint8x16_t result; 9567 __asm__ ("addp %0.16b,%1.16b,%2.16b" 9568 : "=w"(result) 9569 : "w"(a), "w"(b) 9570 : /* No clobbers */); 9571 return result; 9572 } 9573 9574 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 9575 vpaddq_u16 (uint16x8_t a, uint16x8_t b) 9576 { 9577 uint16x8_t result; 9578 __asm__ ("addp %0.8h,%1.8h,%2.8h" 9579 : "=w"(result) 9580 : "w"(a), "w"(b) 9581 : /* No clobbers */); 9582 return result; 9583 } 9584 9585 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 9586 vpaddq_u32 (uint32x4_t a, uint32x4_t b) 9587 { 9588 uint32x4_t result; 9589 __asm__ ("addp %0.4s,%1.4s,%2.4s" 9590 : "=w"(result) 9591 : "w"(a), "w"(b) 9592 : /* No clobbers */); 9593 return result; 9594 } 9595 9596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 9597 vpaddq_u64 (uint64x2_t a, uint64x2_t b) 9598 { 9599 uint64x2_t result; 9600 __asm__ ("addp %0.2d,%1.2d,%2.2d" 9601 : "=w"(result) 9602 : "w"(a), "w"(b) 9603 : /* No clobbers */); 9604 return result; 9605 } 9606 9607 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 9608 vpadds_f32 (float32x2_t a) 9609 { 9610 float32_t result; 9611 __asm__ ("faddp %s0,%1.2s" 9612 : "=w"(result) 9613 : "w"(a) 9614 : /* No clobbers */); 9615 return result; 9616 } 9617 9618 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 9619 vpmax_f32 (float32x2_t a, float32x2_t b) 9620 { 9621 float32x2_t result; 9622 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s" 9623 : "=w"(result) 9624 : "w"(a), "w"(b) 9625 : /* No clobbers */); 9626 return result; 9627 } 9628 9629 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 9630 vpmax_s8 (int8x8_t a, int8x8_t b) 9631 { 9632 int8x8_t result; 9633 __asm__ ("smaxp %0.8b, %1.8b, %2.8b" 9634 : "=w"(result) 9635 : "w"(a), "w"(b) 9636 : /* No clobbers */); 9637 return result; 9638 } 9639 9640 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 9641 vpmax_s16 (int16x4_t a, int16x4_t b) 9642 { 9643 int16x4_t result; 9644 __asm__ ("smaxp %0.4h, %1.4h, %2.4h" 9645 : "=w"(result) 9646 : "w"(a), "w"(b) 9647 : /* No clobbers */); 9648 return result; 9649 } 9650 9651 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 9652 vpmax_s32 (int32x2_t a, int32x2_t b) 9653 { 9654 int32x2_t result; 9655 __asm__ ("smaxp %0.2s, %1.2s, %2.2s" 9656 : "=w"(result) 9657 : "w"(a), "w"(b) 9658 : /* No clobbers */); 9659 return result; 9660 } 9661 9662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 9663 vpmax_u8 (uint8x8_t a, uint8x8_t b) 9664 { 9665 uint8x8_t result; 9666 __asm__ ("umaxp %0.8b, %1.8b, %2.8b" 9667 : "=w"(result) 9668 : "w"(a), "w"(b) 9669 : /* No clobbers */); 9670 return result; 9671 } 9672 9673 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 9674 vpmax_u16 (uint16x4_t a, uint16x4_t b) 9675 { 9676 uint16x4_t result; 9677 __asm__ ("umaxp %0.4h, %1.4h, %2.4h" 9678 : "=w"(result) 9679 : "w"(a), "w"(b) 9680 : /* No clobbers */); 9681 return result; 9682 } 9683 9684 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 9685 vpmax_u32 (uint32x2_t a, uint32x2_t b) 9686 { 9687 uint32x2_t result; 9688 __asm__ ("umaxp %0.2s, %1.2s, %2.2s" 9689 : "=w"(result) 9690 : "w"(a), "w"(b) 9691 : /* No clobbers */); 9692 return result; 9693 } 9694 9695 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 9696 vpmaxnm_f32 (float32x2_t a, float32x2_t b) 9697 { 9698 float32x2_t result; 9699 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s" 9700 : "=w"(result) 9701 : "w"(a), "w"(b) 9702 : /* No clobbers */); 9703 return result; 9704 } 9705 9706 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 9707 vpmaxnmq_f32 (float32x4_t a, float32x4_t b) 9708 { 9709 float32x4_t result; 9710 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s" 9711 : "=w"(result) 9712 : "w"(a), "w"(b) 9713 : /* No clobbers */); 9714 return result; 9715 } 9716 9717 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 9718 vpmaxnmq_f64 (float64x2_t a, float64x2_t b) 9719 { 9720 float64x2_t result; 9721 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d" 9722 : "=w"(result) 9723 : "w"(a), "w"(b) 9724 : /* No clobbers */); 9725 return result; 9726 } 9727 9728 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 9729 vpmaxnmqd_f64 (float64x2_t a) 9730 { 9731 float64_t result; 9732 __asm__ ("fmaxnmp %d0,%1.2d" 9733 : "=w"(result) 9734 : "w"(a) 9735 : /* No clobbers */); 9736 return result; 9737 } 9738 9739 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 9740 vpmaxnms_f32 (float32x2_t a) 9741 { 9742 float32_t result; 9743 __asm__ ("fmaxnmp %s0,%1.2s" 9744 : "=w"(result) 9745 : "w"(a) 9746 : /* No clobbers */); 9747 return result; 9748 } 9749 9750 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 9751 vpmaxq_f32 (float32x4_t a, float32x4_t b) 9752 { 9753 float32x4_t result; 9754 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s" 9755 : "=w"(result) 9756 : "w"(a), "w"(b) 9757 : /* No clobbers */); 9758 return result; 9759 } 9760 9761 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 9762 vpmaxq_f64 (float64x2_t a, float64x2_t b) 9763 { 9764 float64x2_t result; 9765 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d" 9766 : "=w"(result) 9767 : "w"(a), "w"(b) 9768 : /* No clobbers */); 9769 return result; 9770 } 9771 9772 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 9773 vpmaxq_s8 (int8x16_t a, int8x16_t b) 9774 { 9775 int8x16_t result; 9776 __asm__ ("smaxp %0.16b, %1.16b, %2.16b" 9777 : "=w"(result) 9778 : "w"(a), "w"(b) 9779 : /* No clobbers */); 9780 return result; 9781 } 9782 9783 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 9784 vpmaxq_s16 (int16x8_t a, int16x8_t b) 9785 { 9786 int16x8_t result; 9787 __asm__ ("smaxp %0.8h, %1.8h, %2.8h" 9788 : "=w"(result) 9789 : "w"(a), "w"(b) 9790 : /* No clobbers */); 9791 return result; 9792 } 9793 9794 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 9795 vpmaxq_s32 (int32x4_t a, int32x4_t b) 9796 { 9797 int32x4_t result; 9798 __asm__ ("smaxp %0.4s, %1.4s, %2.4s" 9799 : "=w"(result) 9800 : "w"(a), "w"(b) 9801 : /* No clobbers */); 9802 return result; 9803 } 9804 9805 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 9806 vpmaxq_u8 (uint8x16_t a, uint8x16_t b) 9807 { 9808 uint8x16_t result; 9809 __asm__ ("umaxp %0.16b, %1.16b, %2.16b" 9810 : "=w"(result) 9811 : "w"(a), "w"(b) 9812 : /* No clobbers */); 9813 return result; 9814 } 9815 9816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 9817 vpmaxq_u16 (uint16x8_t a, uint16x8_t b) 9818 { 9819 uint16x8_t result; 9820 __asm__ ("umaxp %0.8h, %1.8h, %2.8h" 9821 : "=w"(result) 9822 : "w"(a), "w"(b) 9823 : /* No clobbers */); 9824 return result; 9825 } 9826 9827 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 9828 vpmaxq_u32 (uint32x4_t a, uint32x4_t b) 9829 { 9830 uint32x4_t result; 9831 __asm__ ("umaxp %0.4s, %1.4s, %2.4s" 9832 : "=w"(result) 9833 : "w"(a), "w"(b) 9834 : /* No clobbers */); 9835 return result; 9836 } 9837 9838 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 9839 vpmaxqd_f64 (float64x2_t a) 9840 { 9841 float64_t result; 9842 __asm__ ("fmaxp %d0,%1.2d" 9843 : "=w"(result) 9844 : "w"(a) 9845 : /* No clobbers */); 9846 return result; 9847 } 9848 9849 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 9850 vpmaxs_f32 (float32x2_t a) 9851 { 9852 float32_t result; 9853 __asm__ ("fmaxp %s0,%1.2s" 9854 : "=w"(result) 9855 : "w"(a) 9856 : /* No clobbers */); 9857 return result; 9858 } 9859 9860 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 9861 vpmin_f32 (float32x2_t a, float32x2_t b) 9862 { 9863 float32x2_t result; 9864 __asm__ ("fminp %0.2s, %1.2s, %2.2s" 9865 : "=w"(result) 9866 : "w"(a), "w"(b) 9867 : /* No clobbers */); 9868 return result; 9869 } 9870 9871 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 9872 vpmin_s8 (int8x8_t a, int8x8_t b) 9873 { 9874 int8x8_t result; 9875 __asm__ ("sminp %0.8b, %1.8b, %2.8b" 9876 : "=w"(result) 9877 : "w"(a), "w"(b) 9878 : /* No clobbers */); 9879 return result; 9880 } 9881 9882 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 9883 vpmin_s16 (int16x4_t a, int16x4_t b) 9884 { 9885 int16x4_t result; 9886 __asm__ ("sminp %0.4h, %1.4h, %2.4h" 9887 : "=w"(result) 9888 : "w"(a), "w"(b) 9889 : /* No clobbers */); 9890 return result; 9891 } 9892 9893 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 9894 vpmin_s32 (int32x2_t a, int32x2_t b) 9895 { 9896 int32x2_t result; 9897 __asm__ ("sminp %0.2s, %1.2s, %2.2s" 9898 : "=w"(result) 9899 : "w"(a), "w"(b) 9900 : /* No clobbers */); 9901 return result; 9902 } 9903 9904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 9905 vpmin_u8 (uint8x8_t a, uint8x8_t b) 9906 { 9907 uint8x8_t result; 9908 __asm__ ("uminp %0.8b, %1.8b, %2.8b" 9909 : "=w"(result) 9910 : "w"(a), "w"(b) 9911 : /* No clobbers */); 9912 return result; 9913 } 9914 9915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 9916 vpmin_u16 (uint16x4_t a, uint16x4_t b) 9917 { 9918 uint16x4_t result; 9919 __asm__ ("uminp %0.4h, %1.4h, %2.4h" 9920 : "=w"(result) 9921 : "w"(a), "w"(b) 9922 : /* No clobbers */); 9923 return result; 9924 } 9925 9926 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 9927 vpmin_u32 (uint32x2_t a, uint32x2_t b) 9928 { 9929 uint32x2_t result; 9930 __asm__ ("uminp %0.2s, %1.2s, %2.2s" 9931 : "=w"(result) 9932 : "w"(a), "w"(b) 9933 : /* No clobbers */); 9934 return result; 9935 } 9936 9937 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 9938 vpminnm_f32 (float32x2_t a, float32x2_t b) 9939 { 9940 float32x2_t result; 9941 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s" 9942 : "=w"(result) 9943 : "w"(a), "w"(b) 9944 : /* No clobbers */); 9945 return result; 9946 } 9947 9948 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 9949 vpminnmq_f32 (float32x4_t a, float32x4_t b) 9950 { 9951 float32x4_t result; 9952 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s" 9953 : "=w"(result) 9954 : "w"(a), "w"(b) 9955 : /* No clobbers */); 9956 return result; 9957 } 9958 9959 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 9960 vpminnmq_f64 (float64x2_t a, float64x2_t b) 9961 { 9962 float64x2_t result; 9963 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d" 9964 : "=w"(result) 9965 : "w"(a), "w"(b) 9966 : /* No clobbers */); 9967 return result; 9968 } 9969 9970 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 9971 vpminnmqd_f64 (float64x2_t a) 9972 { 9973 float64_t result; 9974 __asm__ ("fminnmp %d0,%1.2d" 9975 : "=w"(result) 9976 : "w"(a) 9977 : /* No clobbers */); 9978 return result; 9979 } 9980 9981 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 9982 vpminnms_f32 (float32x2_t a) 9983 { 9984 float32_t result; 9985 __asm__ ("fminnmp %s0,%1.2s" 9986 : "=w"(result) 9987 : "w"(a) 9988 : /* No clobbers */); 9989 return result; 9990 } 9991 9992 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 9993 vpminq_f32 (float32x4_t a, float32x4_t b) 9994 { 9995 float32x4_t result; 9996 __asm__ ("fminp %0.4s, %1.4s, %2.4s" 9997 : "=w"(result) 9998 : "w"(a), "w"(b) 9999 : /* No clobbers */); 10000 return result; 10001 } 10002 10003 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 10004 vpminq_f64 (float64x2_t a, float64x2_t b) 10005 { 10006 float64x2_t result; 10007 __asm__ ("fminp %0.2d, %1.2d, %2.2d" 10008 : "=w"(result) 10009 : "w"(a), "w"(b) 10010 : /* No clobbers */); 10011 return result; 10012 } 10013 10014 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 10015 vpminq_s8 (int8x16_t a, int8x16_t b) 10016 { 10017 int8x16_t result; 10018 __asm__ ("sminp %0.16b, %1.16b, %2.16b" 10019 : "=w"(result) 10020 : "w"(a), "w"(b) 10021 : /* No clobbers */); 10022 return result; 10023 } 10024 10025 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 10026 vpminq_s16 (int16x8_t a, int16x8_t b) 10027 { 10028 int16x8_t result; 10029 __asm__ ("sminp %0.8h, %1.8h, %2.8h" 10030 : "=w"(result) 10031 : "w"(a), "w"(b) 10032 : /* No clobbers */); 10033 return result; 10034 } 10035 10036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 10037 vpminq_s32 (int32x4_t a, int32x4_t b) 10038 { 10039 int32x4_t result; 10040 __asm__ ("sminp %0.4s, %1.4s, %2.4s" 10041 : "=w"(result) 10042 : "w"(a), "w"(b) 10043 : /* No clobbers */); 10044 return result; 10045 } 10046 10047 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 10048 vpminq_u8 (uint8x16_t a, uint8x16_t b) 10049 { 10050 uint8x16_t result; 10051 __asm__ ("uminp %0.16b, %1.16b, %2.16b" 10052 : "=w"(result) 10053 : "w"(a), "w"(b) 10054 : /* No clobbers */); 10055 return result; 10056 } 10057 10058 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 10059 vpminq_u16 (uint16x8_t a, uint16x8_t b) 10060 { 10061 uint16x8_t result; 10062 __asm__ ("uminp %0.8h, %1.8h, %2.8h" 10063 : "=w"(result) 10064 : "w"(a), "w"(b) 10065 : /* No clobbers */); 10066 return result; 10067 } 10068 10069 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 10070 vpminq_u32 (uint32x4_t a, uint32x4_t b) 10071 { 10072 uint32x4_t result; 10073 __asm__ ("uminp %0.4s, %1.4s, %2.4s" 10074 : "=w"(result) 10075 : "w"(a), "w"(b) 10076 : /* No clobbers */); 10077 return result; 10078 } 10079 10080 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 10081 vpminqd_f64 (float64x2_t a) 10082 { 10083 float64_t result; 10084 __asm__ ("fminp %d0,%1.2d" 10085 : "=w"(result) 10086 : "w"(a) 10087 : /* No clobbers */); 10088 return result; 10089 } 10090 10091 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 10092 vpmins_f32 (float32x2_t a) 10093 { 10094 float32_t result; 10095 __asm__ ("fminp %s0,%1.2s" 10096 : "=w"(result) 10097 : "w"(a) 10098 : /* No clobbers */); 10099 return result; 10100 } 10101 10102 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 10103 vqdmulh_n_s16 (int16x4_t a, int16_t b) 10104 { 10105 int16x4_t result; 10106 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" 10107 : "=w"(result) 10108 : "w"(a), "x"(b) 10109 : /* No clobbers */); 10110 return result; 10111 } 10112 10113 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 10114 vqdmulh_n_s32 (int32x2_t a, int32_t b) 10115 { 10116 int32x2_t result; 10117 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" 10118 : "=w"(result) 10119 : "w"(a), "w"(b) 10120 : /* No clobbers */); 10121 return result; 10122 } 10123 10124 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 10125 vqdmulhq_n_s16 (int16x8_t a, int16_t b) 10126 { 10127 int16x8_t result; 10128 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" 10129 : "=w"(result) 10130 : "w"(a), "x"(b) 10131 : /* No clobbers */); 10132 return result; 10133 } 10134 10135 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 10136 vqdmulhq_n_s32 (int32x4_t a, int32_t b) 10137 { 10138 int32x4_t result; 10139 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" 10140 : "=w"(result) 10141 : "w"(a), "w"(b) 10142 : /* No clobbers */); 10143 return result; 10144 } 10145 10146 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 10147 vqmovn_high_s16 (int8x8_t a, int16x8_t b) 10148 { 10149 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); 10150 __asm__ ("sqxtn2 %0.16b, %1.8h" 10151 : "+w"(result) 10152 : "w"(b) 10153 : /* No clobbers */); 10154 return result; 10155 } 10156 10157 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 10158 vqmovn_high_s32 (int16x4_t a, int32x4_t b) 10159 { 10160 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); 10161 __asm__ ("sqxtn2 %0.8h, %1.4s" 10162 : "+w"(result) 10163 : "w"(b) 10164 : /* No clobbers */); 10165 return result; 10166 } 10167 10168 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 10169 vqmovn_high_s64 (int32x2_t a, int64x2_t b) 10170 { 10171 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); 10172 __asm__ ("sqxtn2 %0.4s, %1.2d" 10173 : "+w"(result) 10174 : "w"(b) 10175 : /* No clobbers */); 10176 return result; 10177 } 10178 10179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 10180 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) 10181 { 10182 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); 10183 __asm__ ("uqxtn2 %0.16b, %1.8h" 10184 : "+w"(result) 10185 : "w"(b) 10186 : /* No clobbers */); 10187 return result; 10188 } 10189 10190 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 10191 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) 10192 { 10193 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); 10194 __asm__ ("uqxtn2 %0.8h, %1.4s" 10195 : "+w"(result) 10196 : "w"(b) 10197 : /* No clobbers */); 10198 return result; 10199 } 10200 10201 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 10202 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) 10203 { 10204 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); 10205 __asm__ ("uqxtn2 %0.4s, %1.2d" 10206 : "+w"(result) 10207 : "w"(b) 10208 : /* No clobbers */); 10209 return result; 10210 } 10211 10212 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 10213 vqmovun_high_s16 (uint8x8_t a, int16x8_t b) 10214 { 10215 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); 10216 __asm__ ("sqxtun2 %0.16b, %1.8h" 10217 : "+w"(result) 10218 : "w"(b) 10219 : /* No clobbers */); 10220 return result; 10221 } 10222 10223 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 10224 vqmovun_high_s32 (uint16x4_t a, int32x4_t b) 10225 { 10226 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); 10227 __asm__ ("sqxtun2 %0.8h, %1.4s" 10228 : "+w"(result) 10229 : "w"(b) 10230 : /* No clobbers */); 10231 return result; 10232 } 10233 10234 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 10235 vqmovun_high_s64 (uint32x2_t a, int64x2_t b) 10236 { 10237 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); 10238 __asm__ ("sqxtun2 %0.4s, %1.2d" 10239 : "+w"(result) 10240 : "w"(b) 10241 : /* No clobbers */); 10242 return result; 10243 } 10244 10245 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 10246 vqrdmulh_n_s16 (int16x4_t a, int16_t b) 10247 { 10248 int16x4_t result; 10249 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" 10250 : "=w"(result) 10251 : "w"(a), "x"(b) 10252 : /* No clobbers */); 10253 return result; 10254 } 10255 10256 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 10257 vqrdmulh_n_s32 (int32x2_t a, int32_t b) 10258 { 10259 int32x2_t result; 10260 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" 10261 : "=w"(result) 10262 : "w"(a), "w"(b) 10263 : /* No clobbers */); 10264 return result; 10265 } 10266 10267 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 10268 vqrdmulhq_n_s16 (int16x8_t a, int16_t b) 10269 { 10270 int16x8_t result; 10271 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" 10272 : "=w"(result) 10273 : "w"(a), "x"(b) 10274 : /* No clobbers */); 10275 return result; 10276 } 10277 10278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 10279 vqrdmulhq_n_s32 (int32x4_t a, int32_t b) 10280 { 10281 int32x4_t result; 10282 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" 10283 : "=w"(result) 10284 : "w"(a), "w"(b) 10285 : /* No clobbers */); 10286 return result; 10287 } 10288 10289 #define vqrshrn_high_n_s16(a, b, c) \ 10290 __extension__ \ 10291 ({ \ 10292 int16x8_t b_ = (b); \ 10293 int8x8_t a_ = (a); \ 10294 int8x16_t result = vcombine_s8 \ 10295 (a_, vcreate_s8 \ 10296 (__AARCH64_UINT64_C (0x0))); \ 10297 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ 10298 : "+w"(result) \ 10299 : "w"(b_), "i"(c) \ 10300 : /* No clobbers */); \ 10301 result; \ 10302 }) 10303 10304 #define vqrshrn_high_n_s32(a, b, c) \ 10305 __extension__ \ 10306 ({ \ 10307 int32x4_t b_ = (b); \ 10308 int16x4_t a_ = (a); \ 10309 int16x8_t result = vcombine_s16 \ 10310 (a_, vcreate_s16 \ 10311 (__AARCH64_UINT64_C (0x0))); \ 10312 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ 10313 : "+w"(result) \ 10314 : "w"(b_), "i"(c) \ 10315 : /* No clobbers */); \ 10316 result; \ 10317 }) 10318 10319 #define vqrshrn_high_n_s64(a, b, c) \ 10320 __extension__ \ 10321 ({ \ 10322 int64x2_t b_ = (b); \ 10323 int32x2_t a_ = (a); \ 10324 int32x4_t result = vcombine_s32 \ 10325 (a_, vcreate_s32 \ 10326 (__AARCH64_UINT64_C (0x0))); \ 10327 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ 10328 : "+w"(result) \ 10329 : "w"(b_), "i"(c) \ 10330 : /* No clobbers */); \ 10331 result; \ 10332 }) 10333 10334 #define vqrshrn_high_n_u16(a, b, c) \ 10335 __extension__ \ 10336 ({ \ 10337 uint16x8_t b_ = (b); \ 10338 uint8x8_t a_ = (a); \ 10339 uint8x16_t result = vcombine_u8 \ 10340 (a_, vcreate_u8 \ 10341 (__AARCH64_UINT64_C (0x0))); \ 10342 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ 10343 : "+w"(result) \ 10344 : "w"(b_), "i"(c) \ 10345 : /* No clobbers */); \ 10346 result; \ 10347 }) 10348 10349 #define vqrshrn_high_n_u32(a, b, c) \ 10350 __extension__ \ 10351 ({ \ 10352 uint32x4_t b_ = (b); \ 10353 uint16x4_t a_ = (a); \ 10354 uint16x8_t result = vcombine_u16 \ 10355 (a_, vcreate_u16 \ 10356 (__AARCH64_UINT64_C (0x0))); \ 10357 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ 10358 : "+w"(result) \ 10359 : "w"(b_), "i"(c) \ 10360 : /* No clobbers */); \ 10361 result; \ 10362 }) 10363 10364 #define vqrshrn_high_n_u64(a, b, c) \ 10365 __extension__ \ 10366 ({ \ 10367 uint64x2_t b_ = (b); \ 10368 uint32x2_t a_ = (a); \ 10369 uint32x4_t result = vcombine_u32 \ 10370 (a_, vcreate_u32 \ 10371 (__AARCH64_UINT64_C (0x0))); \ 10372 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ 10373 : "+w"(result) \ 10374 : "w"(b_), "i"(c) \ 10375 : /* No clobbers */); \ 10376 result; \ 10377 }) 10378 10379 #define vqrshrun_high_n_s16(a, b, c) \ 10380 __extension__ \ 10381 ({ \ 10382 int16x8_t b_ = (b); \ 10383 uint8x8_t a_ = (a); \ 10384 uint8x16_t result = vcombine_u8 \ 10385 (a_, vcreate_u8 \ 10386 (__AARCH64_UINT64_C (0x0))); \ 10387 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ 10388 : "+w"(result) \ 10389 : "w"(b_), "i"(c) \ 10390 : /* No clobbers */); \ 10391 result; \ 10392 }) 10393 10394 #define vqrshrun_high_n_s32(a, b, c) \ 10395 __extension__ \ 10396 ({ \ 10397 int32x4_t b_ = (b); \ 10398 uint16x4_t a_ = (a); \ 10399 uint16x8_t result = vcombine_u16 \ 10400 (a_, vcreate_u16 \ 10401 (__AARCH64_UINT64_C (0x0))); \ 10402 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ 10403 : "+w"(result) \ 10404 : "w"(b_), "i"(c) \ 10405 : /* No clobbers */); \ 10406 result; \ 10407 }) 10408 10409 #define vqrshrun_high_n_s64(a, b, c) \ 10410 __extension__ \ 10411 ({ \ 10412 int64x2_t b_ = (b); \ 10413 uint32x2_t a_ = (a); \ 10414 uint32x4_t result = vcombine_u32 \ 10415 (a_, vcreate_u32 \ 10416 (__AARCH64_UINT64_C (0x0))); \ 10417 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ 10418 : "+w"(result) \ 10419 : "w"(b_), "i"(c) \ 10420 : /* No clobbers */); \ 10421 result; \ 10422 }) 10423 10424 #define vqshrn_high_n_s16(a, b, c) \ 10425 __extension__ \ 10426 ({ \ 10427 int16x8_t b_ = (b); \ 10428 int8x8_t a_ = (a); \ 10429 int8x16_t result = vcombine_s8 \ 10430 (a_, vcreate_s8 \ 10431 (__AARCH64_UINT64_C (0x0))); \ 10432 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ 10433 : "+w"(result) \ 10434 : "w"(b_), "i"(c) \ 10435 : /* No clobbers */); \ 10436 result; \ 10437 }) 10438 10439 #define vqshrn_high_n_s32(a, b, c) \ 10440 __extension__ \ 10441 ({ \ 10442 int32x4_t b_ = (b); \ 10443 int16x4_t a_ = (a); \ 10444 int16x8_t result = vcombine_s16 \ 10445 (a_, vcreate_s16 \ 10446 (__AARCH64_UINT64_C (0x0))); \ 10447 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ 10448 : "+w"(result) \ 10449 : "w"(b_), "i"(c) \ 10450 : /* No clobbers */); \ 10451 result; \ 10452 }) 10453 10454 #define vqshrn_high_n_s64(a, b, c) \ 10455 __extension__ \ 10456 ({ \ 10457 int64x2_t b_ = (b); \ 10458 int32x2_t a_ = (a); \ 10459 int32x4_t result = vcombine_s32 \ 10460 (a_, vcreate_s32 \ 10461 (__AARCH64_UINT64_C (0x0))); \ 10462 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ 10463 : "+w"(result) \ 10464 : "w"(b_), "i"(c) \ 10465 : /* No clobbers */); \ 10466 result; \ 10467 }) 10468 10469 #define vqshrn_high_n_u16(a, b, c) \ 10470 __extension__ \ 10471 ({ \ 10472 uint16x8_t b_ = (b); \ 10473 uint8x8_t a_ = (a); \ 10474 uint8x16_t result = vcombine_u8 \ 10475 (a_, vcreate_u8 \ 10476 (__AARCH64_UINT64_C (0x0))); \ 10477 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ 10478 : "+w"(result) \ 10479 : "w"(b_), "i"(c) \ 10480 : /* No clobbers */); \ 10481 result; \ 10482 }) 10483 10484 #define vqshrn_high_n_u32(a, b, c) \ 10485 __extension__ \ 10486 ({ \ 10487 uint32x4_t b_ = (b); \ 10488 uint16x4_t a_ = (a); \ 10489 uint16x8_t result = vcombine_u16 \ 10490 (a_, vcreate_u16 \ 10491 (__AARCH64_UINT64_C (0x0))); \ 10492 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ 10493 : "+w"(result) \ 10494 : "w"(b_), "i"(c) \ 10495 : /* No clobbers */); \ 10496 result; \ 10497 }) 10498 10499 #define vqshrn_high_n_u64(a, b, c) \ 10500 __extension__ \ 10501 ({ \ 10502 uint64x2_t b_ = (b); \ 10503 uint32x2_t a_ = (a); \ 10504 uint32x4_t result = vcombine_u32 \ 10505 (a_, vcreate_u32 \ 10506 (__AARCH64_UINT64_C (0x0))); \ 10507 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ 10508 : "+w"(result) \ 10509 : "w"(b_), "i"(c) \ 10510 : /* No clobbers */); \ 10511 result; \ 10512 }) 10513 10514 #define vqshrun_high_n_s16(a, b, c) \ 10515 __extension__ \ 10516 ({ \ 10517 int16x8_t b_ = (b); \ 10518 uint8x8_t a_ = (a); \ 10519 uint8x16_t result = vcombine_u8 \ 10520 (a_, vcreate_u8 \ 10521 (__AARCH64_UINT64_C (0x0))); \ 10522 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ 10523 : "+w"(result) \ 10524 : "w"(b_), "i"(c) \ 10525 : /* No clobbers */); \ 10526 result; \ 10527 }) 10528 10529 #define vqshrun_high_n_s32(a, b, c) \ 10530 __extension__ \ 10531 ({ \ 10532 int32x4_t b_ = (b); \ 10533 uint16x4_t a_ = (a); \ 10534 uint16x8_t result = vcombine_u16 \ 10535 (a_, vcreate_u16 \ 10536 (__AARCH64_UINT64_C (0x0))); \ 10537 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ 10538 : "+w"(result) \ 10539 : "w"(b_), "i"(c) \ 10540 : /* No clobbers */); \ 10541 result; \ 10542 }) 10543 10544 #define vqshrun_high_n_s64(a, b, c) \ 10545 __extension__ \ 10546 ({ \ 10547 int64x2_t b_ = (b); \ 10548 uint32x2_t a_ = (a); \ 10549 uint32x4_t result = vcombine_u32 \ 10550 (a_, vcreate_u32 \ 10551 (__AARCH64_UINT64_C (0x0))); \ 10552 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ 10553 : "+w"(result) \ 10554 : "w"(b_), "i"(c) \ 10555 : /* No clobbers */); \ 10556 result; \ 10557 }) 10558 10559 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 10560 vrbit_s8 (int8x8_t a) 10561 { 10562 int8x8_t result; 10563 __asm__ ("rbit %0.8b,%1.8b" 10564 : "=w"(result) 10565 : "w"(a) 10566 : /* No clobbers */); 10567 return result; 10568 } 10569 10570 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 10571 vrbit_u8 (uint8x8_t a) 10572 { 10573 uint8x8_t result; 10574 __asm__ ("rbit %0.8b,%1.8b" 10575 : "=w"(result) 10576 : "w"(a) 10577 : /* No clobbers */); 10578 return result; 10579 } 10580 10581 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 10582 vrbitq_s8 (int8x16_t a) 10583 { 10584 int8x16_t result; 10585 __asm__ ("rbit %0.16b,%1.16b" 10586 : "=w"(result) 10587 : "w"(a) 10588 : /* No clobbers */); 10589 return result; 10590 } 10591 10592 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 10593 vrbitq_u8 (uint8x16_t a) 10594 { 10595 uint8x16_t result; 10596 __asm__ ("rbit %0.16b,%1.16b" 10597 : "=w"(result) 10598 : "w"(a) 10599 : /* No clobbers */); 10600 return result; 10601 } 10602 10603 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 10604 vrecpe_u32 (uint32x2_t a) 10605 { 10606 uint32x2_t result; 10607 __asm__ ("urecpe %0.2s,%1.2s" 10608 : "=w"(result) 10609 : "w"(a) 10610 : /* No clobbers */); 10611 return result; 10612 } 10613 10614 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 10615 vrecpeq_u32 (uint32x4_t a) 10616 { 10617 uint32x4_t result; 10618 __asm__ ("urecpe %0.4s,%1.4s" 10619 : "=w"(result) 10620 : "w"(a) 10621 : /* No clobbers */); 10622 return result; 10623 } 10624 10625 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 10626 vrev16_p8 (poly8x8_t a) 10627 { 10628 poly8x8_t result; 10629 __asm__ ("rev16 %0.8b,%1.8b" 10630 : "=w"(result) 10631 : "w"(a) 10632 : /* No clobbers */); 10633 return result; 10634 } 10635 10636 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 10637 vrev16_s8 (int8x8_t a) 10638 { 10639 int8x8_t result; 10640 __asm__ ("rev16 %0.8b,%1.8b" 10641 : "=w"(result) 10642 : "w"(a) 10643 : /* No clobbers */); 10644 return result; 10645 } 10646 10647 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 10648 vrev16_u8 (uint8x8_t a) 10649 { 10650 uint8x8_t result; 10651 __asm__ ("rev16 %0.8b,%1.8b" 10652 : "=w"(result) 10653 : "w"(a) 10654 : /* No clobbers */); 10655 return result; 10656 } 10657 10658 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 10659 vrev16q_p8 (poly8x16_t a) 10660 { 10661 poly8x16_t result; 10662 __asm__ ("rev16 %0.16b,%1.16b" 10663 : "=w"(result) 10664 : "w"(a) 10665 : /* No clobbers */); 10666 return result; 10667 } 10668 10669 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 10670 vrev16q_s8 (int8x16_t a) 10671 { 10672 int8x16_t result; 10673 __asm__ ("rev16 %0.16b,%1.16b" 10674 : "=w"(result) 10675 : "w"(a) 10676 : /* No clobbers */); 10677 return result; 10678 } 10679 10680 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 10681 vrev16q_u8 (uint8x16_t a) 10682 { 10683 uint8x16_t result; 10684 __asm__ ("rev16 %0.16b,%1.16b" 10685 : "=w"(result) 10686 : "w"(a) 10687 : /* No clobbers */); 10688 return result; 10689 } 10690 10691 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 10692 vrev32_p8 (poly8x8_t a) 10693 { 10694 poly8x8_t result; 10695 __asm__ ("rev32 %0.8b,%1.8b" 10696 : "=w"(result) 10697 : "w"(a) 10698 : /* No clobbers */); 10699 return result; 10700 } 10701 10702 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 10703 vrev32_p16 (poly16x4_t a) 10704 { 10705 poly16x4_t result; 10706 __asm__ ("rev32 %0.4h,%1.4h" 10707 : "=w"(result) 10708 : "w"(a) 10709 : /* No clobbers */); 10710 return result; 10711 } 10712 10713 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 10714 vrev32_s8 (int8x8_t a) 10715 { 10716 int8x8_t result; 10717 __asm__ ("rev32 %0.8b,%1.8b" 10718 : "=w"(result) 10719 : "w"(a) 10720 : /* No clobbers */); 10721 return result; 10722 } 10723 10724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 10725 vrev32_s16 (int16x4_t a) 10726 { 10727 int16x4_t result; 10728 __asm__ ("rev32 %0.4h,%1.4h" 10729 : "=w"(result) 10730 : "w"(a) 10731 : /* No clobbers */); 10732 return result; 10733 } 10734 10735 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 10736 vrev32_u8 (uint8x8_t a) 10737 { 10738 uint8x8_t result; 10739 __asm__ ("rev32 %0.8b,%1.8b" 10740 : "=w"(result) 10741 : "w"(a) 10742 : /* No clobbers */); 10743 return result; 10744 } 10745 10746 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 10747 vrev32_u16 (uint16x4_t a) 10748 { 10749 uint16x4_t result; 10750 __asm__ ("rev32 %0.4h,%1.4h" 10751 : "=w"(result) 10752 : "w"(a) 10753 : /* No clobbers */); 10754 return result; 10755 } 10756 10757 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 10758 vrev32q_p8 (poly8x16_t a) 10759 { 10760 poly8x16_t result; 10761 __asm__ ("rev32 %0.16b,%1.16b" 10762 : "=w"(result) 10763 : "w"(a) 10764 : /* No clobbers */); 10765 return result; 10766 } 10767 10768 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 10769 vrev32q_p16 (poly16x8_t a) 10770 { 10771 poly16x8_t result; 10772 __asm__ ("rev32 %0.8h,%1.8h" 10773 : "=w"(result) 10774 : "w"(a) 10775 : /* No clobbers */); 10776 return result; 10777 } 10778 10779 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 10780 vrev32q_s8 (int8x16_t a) 10781 { 10782 int8x16_t result; 10783 __asm__ ("rev32 %0.16b,%1.16b" 10784 : "=w"(result) 10785 : "w"(a) 10786 : /* No clobbers */); 10787 return result; 10788 } 10789 10790 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 10791 vrev32q_s16 (int16x8_t a) 10792 { 10793 int16x8_t result; 10794 __asm__ ("rev32 %0.8h,%1.8h" 10795 : "=w"(result) 10796 : "w"(a) 10797 : /* No clobbers */); 10798 return result; 10799 } 10800 10801 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 10802 vrev32q_u8 (uint8x16_t a) 10803 { 10804 uint8x16_t result; 10805 __asm__ ("rev32 %0.16b,%1.16b" 10806 : "=w"(result) 10807 : "w"(a) 10808 : /* No clobbers */); 10809 return result; 10810 } 10811 10812 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 10813 vrev32q_u16 (uint16x8_t a) 10814 { 10815 uint16x8_t result; 10816 __asm__ ("rev32 %0.8h,%1.8h" 10817 : "=w"(result) 10818 : "w"(a) 10819 : /* No clobbers */); 10820 return result; 10821 } 10822 10823 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 10824 vrev64_f32 (float32x2_t a) 10825 { 10826 float32x2_t result; 10827 __asm__ ("rev64 %0.2s,%1.2s" 10828 : "=w"(result) 10829 : "w"(a) 10830 : /* No clobbers */); 10831 return result; 10832 } 10833 10834 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 10835 vrev64_p8 (poly8x8_t a) 10836 { 10837 poly8x8_t result; 10838 __asm__ ("rev64 %0.8b,%1.8b" 10839 : "=w"(result) 10840 : "w"(a) 10841 : /* No clobbers */); 10842 return result; 10843 } 10844 10845 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 10846 vrev64_p16 (poly16x4_t a) 10847 { 10848 poly16x4_t result; 10849 __asm__ ("rev64 %0.4h,%1.4h" 10850 : "=w"(result) 10851 : "w"(a) 10852 : /* No clobbers */); 10853 return result; 10854 } 10855 10856 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 10857 vrev64_s8 (int8x8_t a) 10858 { 10859 int8x8_t result; 10860 __asm__ ("rev64 %0.8b,%1.8b" 10861 : "=w"(result) 10862 : "w"(a) 10863 : /* No clobbers */); 10864 return result; 10865 } 10866 10867 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 10868 vrev64_s16 (int16x4_t a) 10869 { 10870 int16x4_t result; 10871 __asm__ ("rev64 %0.4h,%1.4h" 10872 : "=w"(result) 10873 : "w"(a) 10874 : /* No clobbers */); 10875 return result; 10876 } 10877 10878 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 10879 vrev64_s32 (int32x2_t a) 10880 { 10881 int32x2_t result; 10882 __asm__ ("rev64 %0.2s,%1.2s" 10883 : "=w"(result) 10884 : "w"(a) 10885 : /* No clobbers */); 10886 return result; 10887 } 10888 10889 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 10890 vrev64_u8 (uint8x8_t a) 10891 { 10892 uint8x8_t result; 10893 __asm__ ("rev64 %0.8b,%1.8b" 10894 : "=w"(result) 10895 : "w"(a) 10896 : /* No clobbers */); 10897 return result; 10898 } 10899 10900 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 10901 vrev64_u16 (uint16x4_t a) 10902 { 10903 uint16x4_t result; 10904 __asm__ ("rev64 %0.4h,%1.4h" 10905 : "=w"(result) 10906 : "w"(a) 10907 : /* No clobbers */); 10908 return result; 10909 } 10910 10911 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 10912 vrev64_u32 (uint32x2_t a) 10913 { 10914 uint32x2_t result; 10915 __asm__ ("rev64 %0.2s,%1.2s" 10916 : "=w"(result) 10917 : "w"(a) 10918 : /* No clobbers */); 10919 return result; 10920 } 10921 10922 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 10923 vrev64q_f32 (float32x4_t a) 10924 { 10925 float32x4_t result; 10926 __asm__ ("rev64 %0.4s,%1.4s" 10927 : "=w"(result) 10928 : "w"(a) 10929 : /* No clobbers */); 10930 return result; 10931 } 10932 10933 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 10934 vrev64q_p8 (poly8x16_t a) 10935 { 10936 poly8x16_t result; 10937 __asm__ ("rev64 %0.16b,%1.16b" 10938 : "=w"(result) 10939 : "w"(a) 10940 : /* No clobbers */); 10941 return result; 10942 } 10943 10944 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 10945 vrev64q_p16 (poly16x8_t a) 10946 { 10947 poly16x8_t result; 10948 __asm__ ("rev64 %0.8h,%1.8h" 10949 : "=w"(result) 10950 : "w"(a) 10951 : /* No clobbers */); 10952 return result; 10953 } 10954 10955 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 10956 vrev64q_s8 (int8x16_t a) 10957 { 10958 int8x16_t result; 10959 __asm__ ("rev64 %0.16b,%1.16b" 10960 : "=w"(result) 10961 : "w"(a) 10962 : /* No clobbers */); 10963 return result; 10964 } 10965 10966 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 10967 vrev64q_s16 (int16x8_t a) 10968 { 10969 int16x8_t result; 10970 __asm__ ("rev64 %0.8h,%1.8h" 10971 : "=w"(result) 10972 : "w"(a) 10973 : /* No clobbers */); 10974 return result; 10975 } 10976 10977 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 10978 vrev64q_s32 (int32x4_t a) 10979 { 10980 int32x4_t result; 10981 __asm__ ("rev64 %0.4s,%1.4s" 10982 : "=w"(result) 10983 : "w"(a) 10984 : /* No clobbers */); 10985 return result; 10986 } 10987 10988 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 10989 vrev64q_u8 (uint8x16_t a) 10990 { 10991 uint8x16_t result; 10992 __asm__ ("rev64 %0.16b,%1.16b" 10993 : "=w"(result) 10994 : "w"(a) 10995 : /* No clobbers */); 10996 return result; 10997 } 10998 10999 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 11000 vrev64q_u16 (uint16x8_t a) 11001 { 11002 uint16x8_t result; 11003 __asm__ ("rev64 %0.8h,%1.8h" 11004 : "=w"(result) 11005 : "w"(a) 11006 : /* No clobbers */); 11007 return result; 11008 } 11009 11010 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 11011 vrev64q_u32 (uint32x4_t a) 11012 { 11013 uint32x4_t result; 11014 __asm__ ("rev64 %0.4s,%1.4s" 11015 : "=w"(result) 11016 : "w"(a) 11017 : /* No clobbers */); 11018 return result; 11019 } 11020 11021 #define vrshrn_high_n_s16(a, b, c) \ 11022 __extension__ \ 11023 ({ \ 11024 int16x8_t b_ = (b); \ 11025 int8x8_t a_ = (a); \ 11026 int8x16_t result = vcombine_s8 \ 11027 (a_, vcreate_s8 \ 11028 (__AARCH64_UINT64_C (0x0))); \ 11029 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ 11030 : "+w"(result) \ 11031 : "w"(b_), "i"(c) \ 11032 : /* No clobbers */); \ 11033 result; \ 11034 }) 11035 11036 #define vrshrn_high_n_s32(a, b, c) \ 11037 __extension__ \ 11038 ({ \ 11039 int32x4_t b_ = (b); \ 11040 int16x4_t a_ = (a); \ 11041 int16x8_t result = vcombine_s16 \ 11042 (a_, vcreate_s16 \ 11043 (__AARCH64_UINT64_C (0x0))); \ 11044 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ 11045 : "+w"(result) \ 11046 : "w"(b_), "i"(c) \ 11047 : /* No clobbers */); \ 11048 result; \ 11049 }) 11050 11051 #define vrshrn_high_n_s64(a, b, c) \ 11052 __extension__ \ 11053 ({ \ 11054 int64x2_t b_ = (b); \ 11055 int32x2_t a_ = (a); \ 11056 int32x4_t result = vcombine_s32 \ 11057 (a_, vcreate_s32 \ 11058 (__AARCH64_UINT64_C (0x0))); \ 11059 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ 11060 : "+w"(result) \ 11061 : "w"(b_), "i"(c) \ 11062 : /* No clobbers */); \ 11063 result; \ 11064 }) 11065 11066 #define vrshrn_high_n_u16(a, b, c) \ 11067 __extension__ \ 11068 ({ \ 11069 uint16x8_t b_ = (b); \ 11070 uint8x8_t a_ = (a); \ 11071 uint8x16_t result = vcombine_u8 \ 11072 (a_, vcreate_u8 \ 11073 (__AARCH64_UINT64_C (0x0))); \ 11074 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ 11075 : "+w"(result) \ 11076 : "w"(b_), "i"(c) \ 11077 : /* No clobbers */); \ 11078 result; \ 11079 }) 11080 11081 #define vrshrn_high_n_u32(a, b, c) \ 11082 __extension__ \ 11083 ({ \ 11084 uint32x4_t b_ = (b); \ 11085 uint16x4_t a_ = (a); \ 11086 uint16x8_t result = vcombine_u16 \ 11087 (a_, vcreate_u16 \ 11088 (__AARCH64_UINT64_C (0x0))); \ 11089 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ 11090 : "+w"(result) \ 11091 : "w"(b_), "i"(c) \ 11092 : /* No clobbers */); \ 11093 result; \ 11094 }) 11095 11096 #define vrshrn_high_n_u64(a, b, c) \ 11097 __extension__ \ 11098 ({ \ 11099 uint64x2_t b_ = (b); \ 11100 uint32x2_t a_ = (a); \ 11101 uint32x4_t result = vcombine_u32 \ 11102 (a_, vcreate_u32 \ 11103 (__AARCH64_UINT64_C (0x0))); \ 11104 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ 11105 : "+w"(result) \ 11106 : "w"(b_), "i"(c) \ 11107 : /* No clobbers */); \ 11108 result; \ 11109 }) 11110 11111 #define vrshrn_n_s16(a, b) \ 11112 __extension__ \ 11113 ({ \ 11114 int16x8_t a_ = (a); \ 11115 int8x8_t result; \ 11116 __asm__ ("rshrn %0.8b,%1.8h,%2" \ 11117 : "=w"(result) \ 11118 : "w"(a_), "i"(b) \ 11119 : /* No clobbers */); \ 11120 result; \ 11121 }) 11122 11123 #define vrshrn_n_s32(a, b) \ 11124 __extension__ \ 11125 ({ \ 11126 int32x4_t a_ = (a); \ 11127 int16x4_t result; \ 11128 __asm__ ("rshrn %0.4h,%1.4s,%2" \ 11129 : "=w"(result) \ 11130 : "w"(a_), "i"(b) \ 11131 : /* No clobbers */); \ 11132 result; \ 11133 }) 11134 11135 #define vrshrn_n_s64(a, b) \ 11136 __extension__ \ 11137 ({ \ 11138 int64x2_t a_ = (a); \ 11139 int32x2_t result; \ 11140 __asm__ ("rshrn %0.2s,%1.2d,%2" \ 11141 : "=w"(result) \ 11142 : "w"(a_), "i"(b) \ 11143 : /* No clobbers */); \ 11144 result; \ 11145 }) 11146 11147 #define vrshrn_n_u16(a, b) \ 11148 __extension__ \ 11149 ({ \ 11150 uint16x8_t a_ = (a); \ 11151 uint8x8_t result; \ 11152 __asm__ ("rshrn %0.8b,%1.8h,%2" \ 11153 : "=w"(result) \ 11154 : "w"(a_), "i"(b) \ 11155 : /* No clobbers */); \ 11156 result; \ 11157 }) 11158 11159 #define vrshrn_n_u32(a, b) \ 11160 __extension__ \ 11161 ({ \ 11162 uint32x4_t a_ = (a); \ 11163 uint16x4_t result; \ 11164 __asm__ ("rshrn %0.4h,%1.4s,%2" \ 11165 : "=w"(result) \ 11166 : "w"(a_), "i"(b) \ 11167 : /* No clobbers */); \ 11168 result; \ 11169 }) 11170 11171 #define vrshrn_n_u64(a, b) \ 11172 __extension__ \ 11173 ({ \ 11174 uint64x2_t a_ = (a); \ 11175 uint32x2_t result; \ 11176 __asm__ ("rshrn %0.2s,%1.2d,%2" \ 11177 : "=w"(result) \ 11178 : "w"(a_), "i"(b) \ 11179 : /* No clobbers */); \ 11180 result; \ 11181 }) 11182 11183 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 11184 vrsqrte_f32 (float32x2_t a) 11185 { 11186 float32x2_t result; 11187 __asm__ ("frsqrte %0.2s,%1.2s" 11188 : "=w"(result) 11189 : "w"(a) 11190 : /* No clobbers */); 11191 return result; 11192 } 11193 11194 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 11195 vrsqrte_f64 (float64x1_t a) 11196 { 11197 float64x1_t result; 11198 __asm__ ("frsqrte %d0,%d1" 11199 : "=w"(result) 11200 : "w"(a) 11201 : /* No clobbers */); 11202 return result; 11203 } 11204 11205 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 11206 vrsqrte_u32 (uint32x2_t a) 11207 { 11208 uint32x2_t result; 11209 __asm__ ("ursqrte %0.2s,%1.2s" 11210 : "=w"(result) 11211 : "w"(a) 11212 : /* No clobbers */); 11213 return result; 11214 } 11215 11216 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 11217 vrsqrted_f64 (float64_t a) 11218 { 11219 float64_t result; 11220 __asm__ ("frsqrte %d0,%d1" 11221 : "=w"(result) 11222 : "w"(a) 11223 : /* No clobbers */); 11224 return result; 11225 } 11226 11227 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 11228 vrsqrteq_f32 (float32x4_t a) 11229 { 11230 float32x4_t result; 11231 __asm__ ("frsqrte %0.4s,%1.4s" 11232 : "=w"(result) 11233 : "w"(a) 11234 : /* No clobbers */); 11235 return result; 11236 } 11237 11238 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 11239 vrsqrteq_f64 (float64x2_t a) 11240 { 11241 float64x2_t result; 11242 __asm__ ("frsqrte %0.2d,%1.2d" 11243 : "=w"(result) 11244 : "w"(a) 11245 : /* No clobbers */); 11246 return result; 11247 } 11248 11249 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 11250 vrsqrteq_u32 (uint32x4_t a) 11251 { 11252 uint32x4_t result; 11253 __asm__ ("ursqrte %0.4s,%1.4s" 11254 : "=w"(result) 11255 : "w"(a) 11256 : /* No clobbers */); 11257 return result; 11258 } 11259 11260 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 11261 vrsqrtes_f32 (float32_t a) 11262 { 11263 float32_t result; 11264 __asm__ ("frsqrte %s0,%s1" 11265 : "=w"(result) 11266 : "w"(a) 11267 : /* No clobbers */); 11268 return result; 11269 } 11270 11271 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 11272 vrsqrts_f32 (float32x2_t a, float32x2_t b) 11273 { 11274 float32x2_t result; 11275 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s" 11276 : "=w"(result) 11277 : "w"(a), "w"(b) 11278 : /* No clobbers */); 11279 return result; 11280 } 11281 11282 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 11283 vrsqrtsd_f64 (float64_t a, float64_t b) 11284 { 11285 float64_t result; 11286 __asm__ ("frsqrts %d0,%d1,%d2" 11287 : "=w"(result) 11288 : "w"(a), "w"(b) 11289 : /* No clobbers */); 11290 return result; 11291 } 11292 11293 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 11294 vrsqrtsq_f32 (float32x4_t a, float32x4_t b) 11295 { 11296 float32x4_t result; 11297 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s" 11298 : "=w"(result) 11299 : "w"(a), "w"(b) 11300 : /* No clobbers */); 11301 return result; 11302 } 11303 11304 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 11305 vrsqrtsq_f64 (float64x2_t a, float64x2_t b) 11306 { 11307 float64x2_t result; 11308 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" 11309 : "=w"(result) 11310 : "w"(a), "w"(b) 11311 : /* No clobbers */); 11312 return result; 11313 } 11314 11315 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 11316 vrsqrtss_f32 (float32_t a, float32_t b) 11317 { 11318 float32_t result; 11319 __asm__ ("frsqrts %s0,%s1,%s2" 11320 : "=w"(result) 11321 : "w"(a), "w"(b) 11322 : /* No clobbers */); 11323 return result; 11324 } 11325 11326 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 11327 vrsrtsq_f64 (float64x2_t a, float64x2_t b) 11328 { 11329 float64x2_t result; 11330 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" 11331 : "=w"(result) 11332 : "w"(a), "w"(b) 11333 : /* No clobbers */); 11334 return result; 11335 } 11336 11337 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 11338 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) 11339 { 11340 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); 11341 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" 11342 : "+w"(result) 11343 : "w"(b), "w"(c) 11344 : /* No clobbers */); 11345 return result; 11346 } 11347 11348 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 11349 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) 11350 { 11351 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); 11352 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" 11353 : "+w"(result) 11354 : "w"(b), "w"(c) 11355 : /* No clobbers */); 11356 return result; 11357 } 11358 11359 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 11360 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) 11361 { 11362 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); 11363 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" 11364 : "+w"(result) 11365 : "w"(b), "w"(c) 11366 : /* No clobbers */); 11367 return result; 11368 } 11369 11370 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 11371 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) 11372 { 11373 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); 11374 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" 11375 : "+w"(result) 11376 : "w"(b), "w"(c) 11377 : /* No clobbers */); 11378 return result; 11379 } 11380 11381 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 11382 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) 11383 { 11384 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); 11385 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" 11386 : "+w"(result) 11387 : "w"(b), "w"(c) 11388 : /* No clobbers */); 11389 return result; 11390 } 11391 11392 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 11393 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) 11394 { 11395 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); 11396 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" 11397 : "+w"(result) 11398 : "w"(b), "w"(c) 11399 : /* No clobbers */); 11400 return result; 11401 } 11402 11403 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 11404 vrsubhn_s16 (int16x8_t a, int16x8_t b) 11405 { 11406 int8x8_t result; 11407 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" 11408 : "=w"(result) 11409 : "w"(a), "w"(b) 11410 : /* No clobbers */); 11411 return result; 11412 } 11413 11414 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 11415 vrsubhn_s32 (int32x4_t a, int32x4_t b) 11416 { 11417 int16x4_t result; 11418 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" 11419 : "=w"(result) 11420 : "w"(a), "w"(b) 11421 : /* No clobbers */); 11422 return result; 11423 } 11424 11425 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 11426 vrsubhn_s64 (int64x2_t a, int64x2_t b) 11427 { 11428 int32x2_t result; 11429 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" 11430 : "=w"(result) 11431 : "w"(a), "w"(b) 11432 : /* No clobbers */); 11433 return result; 11434 } 11435 11436 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 11437 vrsubhn_u16 (uint16x8_t a, uint16x8_t b) 11438 { 11439 uint8x8_t result; 11440 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" 11441 : "=w"(result) 11442 : "w"(a), "w"(b) 11443 : /* No clobbers */); 11444 return result; 11445 } 11446 11447 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 11448 vrsubhn_u32 (uint32x4_t a, uint32x4_t b) 11449 { 11450 uint16x4_t result; 11451 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" 11452 : "=w"(result) 11453 : "w"(a), "w"(b) 11454 : /* No clobbers */); 11455 return result; 11456 } 11457 11458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 11459 vrsubhn_u64 (uint64x2_t a, uint64x2_t b) 11460 { 11461 uint32x2_t result; 11462 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" 11463 : "=w"(result) 11464 : "w"(a), "w"(b) 11465 : /* No clobbers */); 11466 return result; 11467 } 11468 11469 #define vset_lane_f32(a, b, c) \ 11470 __extension__ \ 11471 ({ \ 11472 float32x2_t b_ = (b); \ 11473 float32_t a_ = (a); \ 11474 float32x2_t result; \ 11475 __asm__ ("ins %0.s[%3], %w1" \ 11476 : "=w"(result) \ 11477 : "r"(a_), "0"(b_), "i"(c) \ 11478 : /* No clobbers */); \ 11479 result; \ 11480 }) 11481 11482 #define vset_lane_f64(a, b, c) \ 11483 __extension__ \ 11484 ({ \ 11485 float64x1_t b_ = (b); \ 11486 float64_t a_ = (a); \ 11487 float64x1_t result; \ 11488 __asm__ ("ins %0.d[%3], %x1" \ 11489 : "=w"(result) \ 11490 : "r"(a_), "0"(b_), "i"(c) \ 11491 : /* No clobbers */); \ 11492 result; \ 11493 }) 11494 11495 #define vset_lane_p8(a, b, c) \ 11496 __extension__ \ 11497 ({ \ 11498 poly8x8_t b_ = (b); \ 11499 poly8_t a_ = (a); \ 11500 poly8x8_t result; \ 11501 __asm__ ("ins %0.b[%3], %w1" \ 11502 : "=w"(result) \ 11503 : "r"(a_), "0"(b_), "i"(c) \ 11504 : /* No clobbers */); \ 11505 result; \ 11506 }) 11507 11508 #define vset_lane_p16(a, b, c) \ 11509 __extension__ \ 11510 ({ \ 11511 poly16x4_t b_ = (b); \ 11512 poly16_t a_ = (a); \ 11513 poly16x4_t result; \ 11514 __asm__ ("ins %0.h[%3], %w1" \ 11515 : "=w"(result) \ 11516 : "r"(a_), "0"(b_), "i"(c) \ 11517 : /* No clobbers */); \ 11518 result; \ 11519 }) 11520 11521 #define vset_lane_s8(a, b, c) \ 11522 __extension__ \ 11523 ({ \ 11524 int8x8_t b_ = (b); \ 11525 int8_t a_ = (a); \ 11526 int8x8_t result; \ 11527 __asm__ ("ins %0.b[%3], %w1" \ 11528 : "=w"(result) \ 11529 : "r"(a_), "0"(b_), "i"(c) \ 11530 : /* No clobbers */); \ 11531 result; \ 11532 }) 11533 11534 #define vset_lane_s16(a, b, c) \ 11535 __extension__ \ 11536 ({ \ 11537 int16x4_t b_ = (b); \ 11538 int16_t a_ = (a); \ 11539 int16x4_t result; \ 11540 __asm__ ("ins %0.h[%3], %w1" \ 11541 : "=w"(result) \ 11542 : "r"(a_), "0"(b_), "i"(c) \ 11543 : /* No clobbers */); \ 11544 result; \ 11545 }) 11546 11547 #define vset_lane_s32(a, b, c) \ 11548 __extension__ \ 11549 ({ \ 11550 int32x2_t b_ = (b); \ 11551 int32_t a_ = (a); \ 11552 int32x2_t result; \ 11553 __asm__ ("ins %0.s[%3], %w1" \ 11554 : "=w"(result) \ 11555 : "r"(a_), "0"(b_), "i"(c) \ 11556 : /* No clobbers */); \ 11557 result; \ 11558 }) 11559 11560 #define vset_lane_s64(a, b, c) \ 11561 __extension__ \ 11562 ({ \ 11563 int64x1_t b_ = (b); \ 11564 int64_t a_ = (a); \ 11565 int64x1_t result; \ 11566 __asm__ ("ins %0.d[%3], %x1" \ 11567 : "=w"(result) \ 11568 : "r"(a_), "0"(b_), "i"(c) \ 11569 : /* No clobbers */); \ 11570 result; \ 11571 }) 11572 11573 #define vset_lane_u8(a, b, c) \ 11574 __extension__ \ 11575 ({ \ 11576 uint8x8_t b_ = (b); \ 11577 uint8_t a_ = (a); \ 11578 uint8x8_t result; \ 11579 __asm__ ("ins %0.b[%3], %w1" \ 11580 : "=w"(result) \ 11581 : "r"(a_), "0"(b_), "i"(c) \ 11582 : /* No clobbers */); \ 11583 result; \ 11584 }) 11585 11586 #define vset_lane_u16(a, b, c) \ 11587 __extension__ \ 11588 ({ \ 11589 uint16x4_t b_ = (b); \ 11590 uint16_t a_ = (a); \ 11591 uint16x4_t result; \ 11592 __asm__ ("ins %0.h[%3], %w1" \ 11593 : "=w"(result) \ 11594 : "r"(a_), "0"(b_), "i"(c) \ 11595 : /* No clobbers */); \ 11596 result; \ 11597 }) 11598 11599 #define vset_lane_u32(a, b, c) \ 11600 __extension__ \ 11601 ({ \ 11602 uint32x2_t b_ = (b); \ 11603 uint32_t a_ = (a); \ 11604 uint32x2_t result; \ 11605 __asm__ ("ins %0.s[%3], %w1" \ 11606 : "=w"(result) \ 11607 : "r"(a_), "0"(b_), "i"(c) \ 11608 : /* No clobbers */); \ 11609 result; \ 11610 }) 11611 11612 #define vset_lane_u64(a, b, c) \ 11613 __extension__ \ 11614 ({ \ 11615 uint64x1_t b_ = (b); \ 11616 uint64_t a_ = (a); \ 11617 uint64x1_t result; \ 11618 __asm__ ("ins %0.d[%3], %x1" \ 11619 : "=w"(result) \ 11620 : "r"(a_), "0"(b_), "i"(c) \ 11621 : /* No clobbers */); \ 11622 result; \ 11623 }) 11624 11625 #define vsetq_lane_f32(a, b, c) \ 11626 __extension__ \ 11627 ({ \ 11628 float32x4_t b_ = (b); \ 11629 float32_t a_ = (a); \ 11630 float32x4_t result; \ 11631 __asm__ ("ins %0.s[%3], %w1" \ 11632 : "=w"(result) \ 11633 : "r"(a_), "0"(b_), "i"(c) \ 11634 : /* No clobbers */); \ 11635 result; \ 11636 }) 11637 11638 #define vsetq_lane_f64(a, b, c) \ 11639 __extension__ \ 11640 ({ \ 11641 float64x2_t b_ = (b); \ 11642 float64_t a_ = (a); \ 11643 float64x2_t result; \ 11644 __asm__ ("ins %0.d[%3], %x1" \ 11645 : "=w"(result) \ 11646 : "r"(a_), "0"(b_), "i"(c) \ 11647 : /* No clobbers */); \ 11648 result; \ 11649 }) 11650 11651 #define vsetq_lane_p8(a, b, c) \ 11652 __extension__ \ 11653 ({ \ 11654 poly8x16_t b_ = (b); \ 11655 poly8_t a_ = (a); \ 11656 poly8x16_t result; \ 11657 __asm__ ("ins %0.b[%3], %w1" \ 11658 : "=w"(result) \ 11659 : "r"(a_), "0"(b_), "i"(c) \ 11660 : /* No clobbers */); \ 11661 result; \ 11662 }) 11663 11664 #define vsetq_lane_p16(a, b, c) \ 11665 __extension__ \ 11666 ({ \ 11667 poly16x8_t b_ = (b); \ 11668 poly16_t a_ = (a); \ 11669 poly16x8_t result; \ 11670 __asm__ ("ins %0.h[%3], %w1" \ 11671 : "=w"(result) \ 11672 : "r"(a_), "0"(b_), "i"(c) \ 11673 : /* No clobbers */); \ 11674 result; \ 11675 }) 11676 11677 #define vsetq_lane_s8(a, b, c) \ 11678 __extension__ \ 11679 ({ \ 11680 int8x16_t b_ = (b); \ 11681 int8_t a_ = (a); \ 11682 int8x16_t result; \ 11683 __asm__ ("ins %0.b[%3], %w1" \ 11684 : "=w"(result) \ 11685 : "r"(a_), "0"(b_), "i"(c) \ 11686 : /* No clobbers */); \ 11687 result; \ 11688 }) 11689 11690 #define vsetq_lane_s16(a, b, c) \ 11691 __extension__ \ 11692 ({ \ 11693 int16x8_t b_ = (b); \ 11694 int16_t a_ = (a); \ 11695 int16x8_t result; \ 11696 __asm__ ("ins %0.h[%3], %w1" \ 11697 : "=w"(result) \ 11698 : "r"(a_), "0"(b_), "i"(c) \ 11699 : /* No clobbers */); \ 11700 result; \ 11701 }) 11702 11703 #define vsetq_lane_s32(a, b, c) \ 11704 __extension__ \ 11705 ({ \ 11706 int32x4_t b_ = (b); \ 11707 int32_t a_ = (a); \ 11708 int32x4_t result; \ 11709 __asm__ ("ins %0.s[%3], %w1" \ 11710 : "=w"(result) \ 11711 : "r"(a_), "0"(b_), "i"(c) \ 11712 : /* No clobbers */); \ 11713 result; \ 11714 }) 11715 11716 #define vsetq_lane_s64(a, b, c) \ 11717 __extension__ \ 11718 ({ \ 11719 int64x2_t b_ = (b); \ 11720 int64_t a_ = (a); \ 11721 int64x2_t result; \ 11722 __asm__ ("ins %0.d[%3], %x1" \ 11723 : "=w"(result) \ 11724 : "r"(a_), "0"(b_), "i"(c) \ 11725 : /* No clobbers */); \ 11726 result; \ 11727 }) 11728 11729 #define vsetq_lane_u8(a, b, c) \ 11730 __extension__ \ 11731 ({ \ 11732 uint8x16_t b_ = (b); \ 11733 uint8_t a_ = (a); \ 11734 uint8x16_t result; \ 11735 __asm__ ("ins %0.b[%3], %w1" \ 11736 : "=w"(result) \ 11737 : "r"(a_), "0"(b_), "i"(c) \ 11738 : /* No clobbers */); \ 11739 result; \ 11740 }) 11741 11742 #define vsetq_lane_u16(a, b, c) \ 11743 __extension__ \ 11744 ({ \ 11745 uint16x8_t b_ = (b); \ 11746 uint16_t a_ = (a); \ 11747 uint16x8_t result; \ 11748 __asm__ ("ins %0.h[%3], %w1" \ 11749 : "=w"(result) \ 11750 : "r"(a_), "0"(b_), "i"(c) \ 11751 : /* No clobbers */); \ 11752 result; \ 11753 }) 11754 11755 #define vsetq_lane_u32(a, b, c) \ 11756 __extension__ \ 11757 ({ \ 11758 uint32x4_t b_ = (b); \ 11759 uint32_t a_ = (a); \ 11760 uint32x4_t result; \ 11761 __asm__ ("ins %0.s[%3], %w1" \ 11762 : "=w"(result) \ 11763 : "r"(a_), "0"(b_), "i"(c) \ 11764 : /* No clobbers */); \ 11765 result; \ 11766 }) 11767 11768 #define vsetq_lane_u64(a, b, c) \ 11769 __extension__ \ 11770 ({ \ 11771 uint64x2_t b_ = (b); \ 11772 uint64_t a_ = (a); \ 11773 uint64x2_t result; \ 11774 __asm__ ("ins %0.d[%3], %x1" \ 11775 : "=w"(result) \ 11776 : "r"(a_), "0"(b_), "i"(c) \ 11777 : /* No clobbers */); \ 11778 result; \ 11779 }) 11780 11781 #define vshrn_high_n_s16(a, b, c) \ 11782 __extension__ \ 11783 ({ \ 11784 int16x8_t b_ = (b); \ 11785 int8x8_t a_ = (a); \ 11786 int8x16_t result = vcombine_s8 \ 11787 (a_, vcreate_s8 \ 11788 (__AARCH64_UINT64_C (0x0))); \ 11789 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ 11790 : "+w"(result) \ 11791 : "w"(b_), "i"(c) \ 11792 : /* No clobbers */); \ 11793 result; \ 11794 }) 11795 11796 #define vshrn_high_n_s32(a, b, c) \ 11797 __extension__ \ 11798 ({ \ 11799 int32x4_t b_ = (b); \ 11800 int16x4_t a_ = (a); \ 11801 int16x8_t result = vcombine_s16 \ 11802 (a_, vcreate_s16 \ 11803 (__AARCH64_UINT64_C (0x0))); \ 11804 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ 11805 : "+w"(result) \ 11806 : "w"(b_), "i"(c) \ 11807 : /* No clobbers */); \ 11808 result; \ 11809 }) 11810 11811 #define vshrn_high_n_s64(a, b, c) \ 11812 __extension__ \ 11813 ({ \ 11814 int64x2_t b_ = (b); \ 11815 int32x2_t a_ = (a); \ 11816 int32x4_t result = vcombine_s32 \ 11817 (a_, vcreate_s32 \ 11818 (__AARCH64_UINT64_C (0x0))); \ 11819 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ 11820 : "+w"(result) \ 11821 : "w"(b_), "i"(c) \ 11822 : /* No clobbers */); \ 11823 result; \ 11824 }) 11825 11826 #define vshrn_high_n_u16(a, b, c) \ 11827 __extension__ \ 11828 ({ \ 11829 uint16x8_t b_ = (b); \ 11830 uint8x8_t a_ = (a); \ 11831 uint8x16_t result = vcombine_u8 \ 11832 (a_, vcreate_u8 \ 11833 (__AARCH64_UINT64_C (0x0))); \ 11834 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ 11835 : "+w"(result) \ 11836 : "w"(b_), "i"(c) \ 11837 : /* No clobbers */); \ 11838 result; \ 11839 }) 11840 11841 #define vshrn_high_n_u32(a, b, c) \ 11842 __extension__ \ 11843 ({ \ 11844 uint32x4_t b_ = (b); \ 11845 uint16x4_t a_ = (a); \ 11846 uint16x8_t result = vcombine_u16 \ 11847 (a_, vcreate_u16 \ 11848 (__AARCH64_UINT64_C (0x0))); \ 11849 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ 11850 : "+w"(result) \ 11851 : "w"(b_), "i"(c) \ 11852 : /* No clobbers */); \ 11853 result; \ 11854 }) 11855 11856 #define vshrn_high_n_u64(a, b, c) \ 11857 __extension__ \ 11858 ({ \ 11859 uint64x2_t b_ = (b); \ 11860 uint32x2_t a_ = (a); \ 11861 uint32x4_t result = vcombine_u32 \ 11862 (a_, vcreate_u32 \ 11863 (__AARCH64_UINT64_C (0x0))); \ 11864 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ 11865 : "+w"(result) \ 11866 : "w"(b_), "i"(c) \ 11867 : /* No clobbers */); \ 11868 result; \ 11869 }) 11870 11871 #define vshrn_n_s16(a, b) \ 11872 __extension__ \ 11873 ({ \ 11874 int16x8_t a_ = (a); \ 11875 int8x8_t result; \ 11876 __asm__ ("shrn %0.8b,%1.8h,%2" \ 11877 : "=w"(result) \ 11878 : "w"(a_), "i"(b) \ 11879 : /* No clobbers */); \ 11880 result; \ 11881 }) 11882 11883 #define vshrn_n_s32(a, b) \ 11884 __extension__ \ 11885 ({ \ 11886 int32x4_t a_ = (a); \ 11887 int16x4_t result; \ 11888 __asm__ ("shrn %0.4h,%1.4s,%2" \ 11889 : "=w"(result) \ 11890 : "w"(a_), "i"(b) \ 11891 : /* No clobbers */); \ 11892 result; \ 11893 }) 11894 11895 #define vshrn_n_s64(a, b) \ 11896 __extension__ \ 11897 ({ \ 11898 int64x2_t a_ = (a); \ 11899 int32x2_t result; \ 11900 __asm__ ("shrn %0.2s,%1.2d,%2" \ 11901 : "=w"(result) \ 11902 : "w"(a_), "i"(b) \ 11903 : /* No clobbers */); \ 11904 result; \ 11905 }) 11906 11907 #define vshrn_n_u16(a, b) \ 11908 __extension__ \ 11909 ({ \ 11910 uint16x8_t a_ = (a); \ 11911 uint8x8_t result; \ 11912 __asm__ ("shrn %0.8b,%1.8h,%2" \ 11913 : "=w"(result) \ 11914 : "w"(a_), "i"(b) \ 11915 : /* No clobbers */); \ 11916 result; \ 11917 }) 11918 11919 #define vshrn_n_u32(a, b) \ 11920 __extension__ \ 11921 ({ \ 11922 uint32x4_t a_ = (a); \ 11923 uint16x4_t result; \ 11924 __asm__ ("shrn %0.4h,%1.4s,%2" \ 11925 : "=w"(result) \ 11926 : "w"(a_), "i"(b) \ 11927 : /* No clobbers */); \ 11928 result; \ 11929 }) 11930 11931 #define vshrn_n_u64(a, b) \ 11932 __extension__ \ 11933 ({ \ 11934 uint64x2_t a_ = (a); \ 11935 uint32x2_t result; \ 11936 __asm__ ("shrn %0.2s,%1.2d,%2" \ 11937 : "=w"(result) \ 11938 : "w"(a_), "i"(b) \ 11939 : /* No clobbers */); \ 11940 result; \ 11941 }) 11942 11943 #define vsli_n_p8(a, b, c) \ 11944 __extension__ \ 11945 ({ \ 11946 poly8x8_t b_ = (b); \ 11947 poly8x8_t a_ = (a); \ 11948 poly8x8_t result; \ 11949 __asm__ ("sli %0.8b,%2.8b,%3" \ 11950 : "=w"(result) \ 11951 : "0"(a_), "w"(b_), "i"(c) \ 11952 : /* No clobbers */); \ 11953 result; \ 11954 }) 11955 11956 #define vsli_n_p16(a, b, c) \ 11957 __extension__ \ 11958 ({ \ 11959 poly16x4_t b_ = (b); \ 11960 poly16x4_t a_ = (a); \ 11961 poly16x4_t result; \ 11962 __asm__ ("sli %0.4h,%2.4h,%3" \ 11963 : "=w"(result) \ 11964 : "0"(a_), "w"(b_), "i"(c) \ 11965 : /* No clobbers */); \ 11966 result; \ 11967 }) 11968 11969 #define vsliq_n_p8(a, b, c) \ 11970 __extension__ \ 11971 ({ \ 11972 poly8x16_t b_ = (b); \ 11973 poly8x16_t a_ = (a); \ 11974 poly8x16_t result; \ 11975 __asm__ ("sli %0.16b,%2.16b,%3" \ 11976 : "=w"(result) \ 11977 : "0"(a_), "w"(b_), "i"(c) \ 11978 : /* No clobbers */); \ 11979 result; \ 11980 }) 11981 11982 #define vsliq_n_p16(a, b, c) \ 11983 __extension__ \ 11984 ({ \ 11985 poly16x8_t b_ = (b); \ 11986 poly16x8_t a_ = (a); \ 11987 poly16x8_t result; \ 11988 __asm__ ("sli %0.8h,%2.8h,%3" \ 11989 : "=w"(result) \ 11990 : "0"(a_), "w"(b_), "i"(c) \ 11991 : /* No clobbers */); \ 11992 result; \ 11993 }) 11994 11995 #define vsri_n_p8(a, b, c) \ 11996 __extension__ \ 11997 ({ \ 11998 poly8x8_t b_ = (b); \ 11999 poly8x8_t a_ = (a); \ 12000 poly8x8_t result; \ 12001 __asm__ ("sri %0.8b,%2.8b,%3" \ 12002 : "=w"(result) \ 12003 : "0"(a_), "w"(b_), "i"(c) \ 12004 : /* No clobbers */); \ 12005 result; \ 12006 }) 12007 12008 #define vsri_n_p16(a, b, c) \ 12009 __extension__ \ 12010 ({ \ 12011 poly16x4_t b_ = (b); \ 12012 poly16x4_t a_ = (a); \ 12013 poly16x4_t result; \ 12014 __asm__ ("sri %0.4h,%2.4h,%3" \ 12015 : "=w"(result) \ 12016 : "0"(a_), "w"(b_), "i"(c) \ 12017 : /* No clobbers */); \ 12018 result; \ 12019 }) 12020 12021 #define vsriq_n_p8(a, b, c) \ 12022 __extension__ \ 12023 ({ \ 12024 poly8x16_t b_ = (b); \ 12025 poly8x16_t a_ = (a); \ 12026 poly8x16_t result; \ 12027 __asm__ ("sri %0.16b,%2.16b,%3" \ 12028 : "=w"(result) \ 12029 : "0"(a_), "w"(b_), "i"(c) \ 12030 : /* No clobbers */); \ 12031 result; \ 12032 }) 12033 12034 #define vsriq_n_p16(a, b, c) \ 12035 __extension__ \ 12036 ({ \ 12037 poly16x8_t b_ = (b); \ 12038 poly16x8_t a_ = (a); \ 12039 poly16x8_t result; \ 12040 __asm__ ("sri %0.8h,%2.8h,%3" \ 12041 : "=w"(result) \ 12042 : "0"(a_), "w"(b_), "i"(c) \ 12043 : /* No clobbers */); \ 12044 result; \ 12045 }) 12046 12047 #define vst1_lane_f32(a, b, c) \ 12048 __extension__ \ 12049 ({ \ 12050 float32x2_t b_ = (b); \ 12051 float32_t * a_ = (a); \ 12052 __asm__ ("st1 {%1.s}[%2],[%0]" \ 12053 : \ 12054 : "r"(a_), "w"(b_), "i"(c) \ 12055 : "memory"); \ 12056 }) 12057 12058 #define vst1_lane_f64(a, b, c) \ 12059 __extension__ \ 12060 ({ \ 12061 float64x1_t b_ = (b); \ 12062 float64_t * a_ = (a); \ 12063 __asm__ ("st1 {%1.d}[%2],[%0]" \ 12064 : \ 12065 : "r"(a_), "w"(b_), "i"(c) \ 12066 : "memory"); \ 12067 }) 12068 12069 #define vst1_lane_p8(a, b, c) \ 12070 __extension__ \ 12071 ({ \ 12072 poly8x8_t b_ = (b); \ 12073 poly8_t * a_ = (a); \ 12074 __asm__ ("st1 {%1.b}[%2],[%0]" \ 12075 : \ 12076 : "r"(a_), "w"(b_), "i"(c) \ 12077 : "memory"); \ 12078 }) 12079 12080 #define vst1_lane_p16(a, b, c) \ 12081 __extension__ \ 12082 ({ \ 12083 poly16x4_t b_ = (b); \ 12084 poly16_t * a_ = (a); \ 12085 __asm__ ("st1 {%1.h}[%2],[%0]" \ 12086 : \ 12087 : "r"(a_), "w"(b_), "i"(c) \ 12088 : "memory"); \ 12089 }) 12090 12091 #define vst1_lane_s8(a, b, c) \ 12092 __extension__ \ 12093 ({ \ 12094 int8x8_t b_ = (b); \ 12095 int8_t * a_ = (a); \ 12096 __asm__ ("st1 {%1.b}[%2],[%0]" \ 12097 : \ 12098 : "r"(a_), "w"(b_), "i"(c) \ 12099 : "memory"); \ 12100 }) 12101 12102 #define vst1_lane_s16(a, b, c) \ 12103 __extension__ \ 12104 ({ \ 12105 int16x4_t b_ = (b); \ 12106 int16_t * a_ = (a); \ 12107 __asm__ ("st1 {%1.h}[%2],[%0]" \ 12108 : \ 12109 : "r"(a_), "w"(b_), "i"(c) \ 12110 : "memory"); \ 12111 }) 12112 12113 #define vst1_lane_s32(a, b, c) \ 12114 __extension__ \ 12115 ({ \ 12116 int32x2_t b_ = (b); \ 12117 int32_t * a_ = (a); \ 12118 __asm__ ("st1 {%1.s}[%2],[%0]" \ 12119 : \ 12120 : "r"(a_), "w"(b_), "i"(c) \ 12121 : "memory"); \ 12122 }) 12123 12124 #define vst1_lane_s64(a, b, c) \ 12125 __extension__ \ 12126 ({ \ 12127 int64x1_t b_ = (b); \ 12128 int64_t * a_ = (a); \ 12129 __asm__ ("st1 {%1.d}[%2],[%0]" \ 12130 : \ 12131 : "r"(a_), "w"(b_), "i"(c) \ 12132 : "memory"); \ 12133 }) 12134 12135 #define vst1_lane_u8(a, b, c) \ 12136 __extension__ \ 12137 ({ \ 12138 uint8x8_t b_ = (b); \ 12139 uint8_t * a_ = (a); \ 12140 __asm__ ("st1 {%1.b}[%2],[%0]" \ 12141 : \ 12142 : "r"(a_), "w"(b_), "i"(c) \ 12143 : "memory"); \ 12144 }) 12145 12146 #define vst1_lane_u16(a, b, c) \ 12147 __extension__ \ 12148 ({ \ 12149 uint16x4_t b_ = (b); \ 12150 uint16_t * a_ = (a); \ 12151 __asm__ ("st1 {%1.h}[%2],[%0]" \ 12152 : \ 12153 : "r"(a_), "w"(b_), "i"(c) \ 12154 : "memory"); \ 12155 }) 12156 12157 #define vst1_lane_u32(a, b, c) \ 12158 __extension__ \ 12159 ({ \ 12160 uint32x2_t b_ = (b); \ 12161 uint32_t * a_ = (a); \ 12162 __asm__ ("st1 {%1.s}[%2],[%0]" \ 12163 : \ 12164 : "r"(a_), "w"(b_), "i"(c) \ 12165 : "memory"); \ 12166 }) 12167 12168 #define vst1_lane_u64(a, b, c) \ 12169 __extension__ \ 12170 ({ \ 12171 uint64x1_t b_ = (b); \ 12172 uint64_t * a_ = (a); \ 12173 __asm__ ("st1 {%1.d}[%2],[%0]" \ 12174 : \ 12175 : "r"(a_), "w"(b_), "i"(c) \ 12176 : "memory"); \ 12177 }) 12178 12179 12180 #define vst1q_lane_f32(a, b, c) \ 12181 __extension__ \ 12182 ({ \ 12183 float32x4_t b_ = (b); \ 12184 float32_t * a_ = (a); \ 12185 __asm__ ("st1 {%1.s}[%2],[%0]" \ 12186 : \ 12187 : "r"(a_), "w"(b_), "i"(c) \ 12188 : "memory"); \ 12189 }) 12190 12191 #define vst1q_lane_f64(a, b, c) \ 12192 __extension__ \ 12193 ({ \ 12194 float64x2_t b_ = (b); \ 12195 float64_t * a_ = (a); \ 12196 __asm__ ("st1 {%1.d}[%2],[%0]" \ 12197 : \ 12198 : "r"(a_), "w"(b_), "i"(c) \ 12199 : "memory"); \ 12200 }) 12201 12202 #define vst1q_lane_p8(a, b, c) \ 12203 __extension__ \ 12204 ({ \ 12205 poly8x16_t b_ = (b); \ 12206 poly8_t * a_ = (a); \ 12207 __asm__ ("st1 {%1.b}[%2],[%0]" \ 12208 : \ 12209 : "r"(a_), "w"(b_), "i"(c) \ 12210 : "memory"); \ 12211 }) 12212 12213 #define vst1q_lane_p16(a, b, c) \ 12214 __extension__ \ 12215 ({ \ 12216 poly16x8_t b_ = (b); \ 12217 poly16_t * a_ = (a); \ 12218 __asm__ ("st1 {%1.h}[%2],[%0]" \ 12219 : \ 12220 : "r"(a_), "w"(b_), "i"(c) \ 12221 : "memory"); \ 12222 }) 12223 12224 #define vst1q_lane_s8(a, b, c) \ 12225 __extension__ \ 12226 ({ \ 12227 int8x16_t b_ = (b); \ 12228 int8_t * a_ = (a); \ 12229 __asm__ ("st1 {%1.b}[%2],[%0]" \ 12230 : \ 12231 : "r"(a_), "w"(b_), "i"(c) \ 12232 : "memory"); \ 12233 }) 12234 12235 #define vst1q_lane_s16(a, b, c) \ 12236 __extension__ \ 12237 ({ \ 12238 int16x8_t b_ = (b); \ 12239 int16_t * a_ = (a); \ 12240 __asm__ ("st1 {%1.h}[%2],[%0]" \ 12241 : \ 12242 : "r"(a_), "w"(b_), "i"(c) \ 12243 : "memory"); \ 12244 }) 12245 12246 #define vst1q_lane_s32(a, b, c) \ 12247 __extension__ \ 12248 ({ \ 12249 int32x4_t b_ = (b); \ 12250 int32_t * a_ = (a); \ 12251 __asm__ ("st1 {%1.s}[%2],[%0]" \ 12252 : \ 12253 : "r"(a_), "w"(b_), "i"(c) \ 12254 : "memory"); \ 12255 }) 12256 12257 #define vst1q_lane_s64(a, b, c) \ 12258 __extension__ \ 12259 ({ \ 12260 int64x2_t b_ = (b); \ 12261 int64_t * a_ = (a); \ 12262 __asm__ ("st1 {%1.d}[%2],[%0]" \ 12263 : \ 12264 : "r"(a_), "w"(b_), "i"(c) \ 12265 : "memory"); \ 12266 }) 12267 12268 #define vst1q_lane_u8(a, b, c) \ 12269 __extension__ \ 12270 ({ \ 12271 uint8x16_t b_ = (b); \ 12272 uint8_t * a_ = (a); \ 12273 __asm__ ("st1 {%1.b}[%2],[%0]" \ 12274 : \ 12275 : "r"(a_), "w"(b_), "i"(c) \ 12276 : "memory"); \ 12277 }) 12278 12279 #define vst1q_lane_u16(a, b, c) \ 12280 __extension__ \ 12281 ({ \ 12282 uint16x8_t b_ = (b); \ 12283 uint16_t * a_ = (a); \ 12284 __asm__ ("st1 {%1.h}[%2],[%0]" \ 12285 : \ 12286 : "r"(a_), "w"(b_), "i"(c) \ 12287 : "memory"); \ 12288 }) 12289 12290 #define vst1q_lane_u32(a, b, c) \ 12291 __extension__ \ 12292 ({ \ 12293 uint32x4_t b_ = (b); \ 12294 uint32_t * a_ = (a); \ 12295 __asm__ ("st1 {%1.s}[%2],[%0]" \ 12296 : \ 12297 : "r"(a_), "w"(b_), "i"(c) \ 12298 : "memory"); \ 12299 }) 12300 12301 #define vst1q_lane_u64(a, b, c) \ 12302 __extension__ \ 12303 ({ \ 12304 uint64x2_t b_ = (b); \ 12305 uint64_t * a_ = (a); \ 12306 __asm__ ("st1 {%1.d}[%2],[%0]" \ 12307 : \ 12308 : "r"(a_), "w"(b_), "i"(c) \ 12309 : "memory"); \ 12310 }) 12311 12312 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 12313 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) 12314 { 12315 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); 12316 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" 12317 : "+w"(result) 12318 : "w"(b), "w"(c) 12319 : /* No clobbers */); 12320 return result; 12321 } 12322 12323 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 12324 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) 12325 { 12326 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); 12327 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" 12328 : "+w"(result) 12329 : "w"(b), "w"(c) 12330 : /* No clobbers */); 12331 return result; 12332 } 12333 12334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 12335 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) 12336 { 12337 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); 12338 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" 12339 : "+w"(result) 12340 : "w"(b), "w"(c) 12341 : /* No clobbers */); 12342 return result; 12343 } 12344 12345 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 12346 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) 12347 { 12348 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); 12349 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" 12350 : "+w"(result) 12351 : "w"(b), "w"(c) 12352 : /* No clobbers */); 12353 return result; 12354 } 12355 12356 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 12357 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) 12358 { 12359 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); 12360 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" 12361 : "+w"(result) 12362 : "w"(b), "w"(c) 12363 : /* No clobbers */); 12364 return result; 12365 } 12366 12367 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 12368 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) 12369 { 12370 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); 12371 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" 12372 : "+w"(result) 12373 : "w"(b), "w"(c) 12374 : /* No clobbers */); 12375 return result; 12376 } 12377 12378 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 12379 vsubhn_s16 (int16x8_t a, int16x8_t b) 12380 { 12381 int8x8_t result; 12382 __asm__ ("subhn %0.8b, %1.8h, %2.8h" 12383 : "=w"(result) 12384 : "w"(a), "w"(b) 12385 : /* No clobbers */); 12386 return result; 12387 } 12388 12389 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 12390 vsubhn_s32 (int32x4_t a, int32x4_t b) 12391 { 12392 int16x4_t result; 12393 __asm__ ("subhn %0.4h, %1.4s, %2.4s" 12394 : "=w"(result) 12395 : "w"(a), "w"(b) 12396 : /* No clobbers */); 12397 return result; 12398 } 12399 12400 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 12401 vsubhn_s64 (int64x2_t a, int64x2_t b) 12402 { 12403 int32x2_t result; 12404 __asm__ ("subhn %0.2s, %1.2d, %2.2d" 12405 : "=w"(result) 12406 : "w"(a), "w"(b) 12407 : /* No clobbers */); 12408 return result; 12409 } 12410 12411 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 12412 vsubhn_u16 (uint16x8_t a, uint16x8_t b) 12413 { 12414 uint8x8_t result; 12415 __asm__ ("subhn %0.8b, %1.8h, %2.8h" 12416 : "=w"(result) 12417 : "w"(a), "w"(b) 12418 : /* No clobbers */); 12419 return result; 12420 } 12421 12422 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 12423 vsubhn_u32 (uint32x4_t a, uint32x4_t b) 12424 { 12425 uint16x4_t result; 12426 __asm__ ("subhn %0.4h, %1.4s, %2.4s" 12427 : "=w"(result) 12428 : "w"(a), "w"(b) 12429 : /* No clobbers */); 12430 return result; 12431 } 12432 12433 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 12434 vsubhn_u64 (uint64x2_t a, uint64x2_t b) 12435 { 12436 uint32x2_t result; 12437 __asm__ ("subhn %0.2s, %1.2d, %2.2d" 12438 : "=w"(result) 12439 : "w"(a), "w"(b) 12440 : /* No clobbers */); 12441 return result; 12442 } 12443 12444 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 12445 vtrn1_f32 (float32x2_t a, float32x2_t b) 12446 { 12447 float32x2_t result; 12448 __asm__ ("trn1 %0.2s,%1.2s,%2.2s" 12449 : "=w"(result) 12450 : "w"(a), "w"(b) 12451 : /* No clobbers */); 12452 return result; 12453 } 12454 12455 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 12456 vtrn1_p8 (poly8x8_t a, poly8x8_t b) 12457 { 12458 poly8x8_t result; 12459 __asm__ ("trn1 %0.8b,%1.8b,%2.8b" 12460 : "=w"(result) 12461 : "w"(a), "w"(b) 12462 : /* No clobbers */); 12463 return result; 12464 } 12465 12466 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 12467 vtrn1_p16 (poly16x4_t a, poly16x4_t b) 12468 { 12469 poly16x4_t result; 12470 __asm__ ("trn1 %0.4h,%1.4h,%2.4h" 12471 : "=w"(result) 12472 : "w"(a), "w"(b) 12473 : /* No clobbers */); 12474 return result; 12475 } 12476 12477 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 12478 vtrn1_s8 (int8x8_t a, int8x8_t b) 12479 { 12480 int8x8_t result; 12481 __asm__ ("trn1 %0.8b,%1.8b,%2.8b" 12482 : "=w"(result) 12483 : "w"(a), "w"(b) 12484 : /* No clobbers */); 12485 return result; 12486 } 12487 12488 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 12489 vtrn1_s16 (int16x4_t a, int16x4_t b) 12490 { 12491 int16x4_t result; 12492 __asm__ ("trn1 %0.4h,%1.4h,%2.4h" 12493 : "=w"(result) 12494 : "w"(a), "w"(b) 12495 : /* No clobbers */); 12496 return result; 12497 } 12498 12499 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 12500 vtrn1_s32 (int32x2_t a, int32x2_t b) 12501 { 12502 int32x2_t result; 12503 __asm__ ("trn1 %0.2s,%1.2s,%2.2s" 12504 : "=w"(result) 12505 : "w"(a), "w"(b) 12506 : /* No clobbers */); 12507 return result; 12508 } 12509 12510 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 12511 vtrn1_u8 (uint8x8_t a, uint8x8_t b) 12512 { 12513 uint8x8_t result; 12514 __asm__ ("trn1 %0.8b,%1.8b,%2.8b" 12515 : "=w"(result) 12516 : "w"(a), "w"(b) 12517 : /* No clobbers */); 12518 return result; 12519 } 12520 12521 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 12522 vtrn1_u16 (uint16x4_t a, uint16x4_t b) 12523 { 12524 uint16x4_t result; 12525 __asm__ ("trn1 %0.4h,%1.4h,%2.4h" 12526 : "=w"(result) 12527 : "w"(a), "w"(b) 12528 : /* No clobbers */); 12529 return result; 12530 } 12531 12532 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 12533 vtrn1_u32 (uint32x2_t a, uint32x2_t b) 12534 { 12535 uint32x2_t result; 12536 __asm__ ("trn1 %0.2s,%1.2s,%2.2s" 12537 : "=w"(result) 12538 : "w"(a), "w"(b) 12539 : /* No clobbers */); 12540 return result; 12541 } 12542 12543 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 12544 vtrn1q_f32 (float32x4_t a, float32x4_t b) 12545 { 12546 float32x4_t result; 12547 __asm__ ("trn1 %0.4s,%1.4s,%2.4s" 12548 : "=w"(result) 12549 : "w"(a), "w"(b) 12550 : /* No clobbers */); 12551 return result; 12552 } 12553 12554 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 12555 vtrn1q_f64 (float64x2_t a, float64x2_t b) 12556 { 12557 float64x2_t result; 12558 __asm__ ("trn1 %0.2d,%1.2d,%2.2d" 12559 : "=w"(result) 12560 : "w"(a), "w"(b) 12561 : /* No clobbers */); 12562 return result; 12563 } 12564 12565 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 12566 vtrn1q_p8 (poly8x16_t a, poly8x16_t b) 12567 { 12568 poly8x16_t result; 12569 __asm__ ("trn1 %0.16b,%1.16b,%2.16b" 12570 : "=w"(result) 12571 : "w"(a), "w"(b) 12572 : /* No clobbers */); 12573 return result; 12574 } 12575 12576 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 12577 vtrn1q_p16 (poly16x8_t a, poly16x8_t b) 12578 { 12579 poly16x8_t result; 12580 __asm__ ("trn1 %0.8h,%1.8h,%2.8h" 12581 : "=w"(result) 12582 : "w"(a), "w"(b) 12583 : /* No clobbers */); 12584 return result; 12585 } 12586 12587 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 12588 vtrn1q_s8 (int8x16_t a, int8x16_t b) 12589 { 12590 int8x16_t result; 12591 __asm__ ("trn1 %0.16b,%1.16b,%2.16b" 12592 : "=w"(result) 12593 : "w"(a), "w"(b) 12594 : /* No clobbers */); 12595 return result; 12596 } 12597 12598 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 12599 vtrn1q_s16 (int16x8_t a, int16x8_t b) 12600 { 12601 int16x8_t result; 12602 __asm__ ("trn1 %0.8h,%1.8h,%2.8h" 12603 : "=w"(result) 12604 : "w"(a), "w"(b) 12605 : /* No clobbers */); 12606 return result; 12607 } 12608 12609 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 12610 vtrn1q_s32 (int32x4_t a, int32x4_t b) 12611 { 12612 int32x4_t result; 12613 __asm__ ("trn1 %0.4s,%1.4s,%2.4s" 12614 : "=w"(result) 12615 : "w"(a), "w"(b) 12616 : /* No clobbers */); 12617 return result; 12618 } 12619 12620 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 12621 vtrn1q_s64 (int64x2_t a, int64x2_t b) 12622 { 12623 int64x2_t result; 12624 __asm__ ("trn1 %0.2d,%1.2d,%2.2d" 12625 : "=w"(result) 12626 : "w"(a), "w"(b) 12627 : /* No clobbers */); 12628 return result; 12629 } 12630 12631 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 12632 vtrn1q_u8 (uint8x16_t a, uint8x16_t b) 12633 { 12634 uint8x16_t result; 12635 __asm__ ("trn1 %0.16b,%1.16b,%2.16b" 12636 : "=w"(result) 12637 : "w"(a), "w"(b) 12638 : /* No clobbers */); 12639 return result; 12640 } 12641 12642 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 12643 vtrn1q_u16 (uint16x8_t a, uint16x8_t b) 12644 { 12645 uint16x8_t result; 12646 __asm__ ("trn1 %0.8h,%1.8h,%2.8h" 12647 : "=w"(result) 12648 : "w"(a), "w"(b) 12649 : /* No clobbers */); 12650 return result; 12651 } 12652 12653 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 12654 vtrn1q_u32 (uint32x4_t a, uint32x4_t b) 12655 { 12656 uint32x4_t result; 12657 __asm__ ("trn1 %0.4s,%1.4s,%2.4s" 12658 : "=w"(result) 12659 : "w"(a), "w"(b) 12660 : /* No clobbers */); 12661 return result; 12662 } 12663 12664 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 12665 vtrn1q_u64 (uint64x2_t a, uint64x2_t b) 12666 { 12667 uint64x2_t result; 12668 __asm__ ("trn1 %0.2d,%1.2d,%2.2d" 12669 : "=w"(result) 12670 : "w"(a), "w"(b) 12671 : /* No clobbers */); 12672 return result; 12673 } 12674 12675 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 12676 vtrn2_f32 (float32x2_t a, float32x2_t b) 12677 { 12678 float32x2_t result; 12679 __asm__ ("trn2 %0.2s,%1.2s,%2.2s" 12680 : "=w"(result) 12681 : "w"(a), "w"(b) 12682 : /* No clobbers */); 12683 return result; 12684 } 12685 12686 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 12687 vtrn2_p8 (poly8x8_t a, poly8x8_t b) 12688 { 12689 poly8x8_t result; 12690 __asm__ ("trn2 %0.8b,%1.8b,%2.8b" 12691 : "=w"(result) 12692 : "w"(a), "w"(b) 12693 : /* No clobbers */); 12694 return result; 12695 } 12696 12697 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 12698 vtrn2_p16 (poly16x4_t a, poly16x4_t b) 12699 { 12700 poly16x4_t result; 12701 __asm__ ("trn2 %0.4h,%1.4h,%2.4h" 12702 : "=w"(result) 12703 : "w"(a), "w"(b) 12704 : /* No clobbers */); 12705 return result; 12706 } 12707 12708 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 12709 vtrn2_s8 (int8x8_t a, int8x8_t b) 12710 { 12711 int8x8_t result; 12712 __asm__ ("trn2 %0.8b,%1.8b,%2.8b" 12713 : "=w"(result) 12714 : "w"(a), "w"(b) 12715 : /* No clobbers */); 12716 return result; 12717 } 12718 12719 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 12720 vtrn2_s16 (int16x4_t a, int16x4_t b) 12721 { 12722 int16x4_t result; 12723 __asm__ ("trn2 %0.4h,%1.4h,%2.4h" 12724 : "=w"(result) 12725 : "w"(a), "w"(b) 12726 : /* No clobbers */); 12727 return result; 12728 } 12729 12730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 12731 vtrn2_s32 (int32x2_t a, int32x2_t b) 12732 { 12733 int32x2_t result; 12734 __asm__ ("trn2 %0.2s,%1.2s,%2.2s" 12735 : "=w"(result) 12736 : "w"(a), "w"(b) 12737 : /* No clobbers */); 12738 return result; 12739 } 12740 12741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 12742 vtrn2_u8 (uint8x8_t a, uint8x8_t b) 12743 { 12744 uint8x8_t result; 12745 __asm__ ("trn2 %0.8b,%1.8b,%2.8b" 12746 : "=w"(result) 12747 : "w"(a), "w"(b) 12748 : /* No clobbers */); 12749 return result; 12750 } 12751 12752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 12753 vtrn2_u16 (uint16x4_t a, uint16x4_t b) 12754 { 12755 uint16x4_t result; 12756 __asm__ ("trn2 %0.4h,%1.4h,%2.4h" 12757 : "=w"(result) 12758 : "w"(a), "w"(b) 12759 : /* No clobbers */); 12760 return result; 12761 } 12762 12763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 12764 vtrn2_u32 (uint32x2_t a, uint32x2_t b) 12765 { 12766 uint32x2_t result; 12767 __asm__ ("trn2 %0.2s,%1.2s,%2.2s" 12768 : "=w"(result) 12769 : "w"(a), "w"(b) 12770 : /* No clobbers */); 12771 return result; 12772 } 12773 12774 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 12775 vtrn2q_f32 (float32x4_t a, float32x4_t b) 12776 { 12777 float32x4_t result; 12778 __asm__ ("trn2 %0.4s,%1.4s,%2.4s" 12779 : "=w"(result) 12780 : "w"(a), "w"(b) 12781 : /* No clobbers */); 12782 return result; 12783 } 12784 12785 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 12786 vtrn2q_f64 (float64x2_t a, float64x2_t b) 12787 { 12788 float64x2_t result; 12789 __asm__ ("trn2 %0.2d,%1.2d,%2.2d" 12790 : "=w"(result) 12791 : "w"(a), "w"(b) 12792 : /* No clobbers */); 12793 return result; 12794 } 12795 12796 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 12797 vtrn2q_p8 (poly8x16_t a, poly8x16_t b) 12798 { 12799 poly8x16_t result; 12800 __asm__ ("trn2 %0.16b,%1.16b,%2.16b" 12801 : "=w"(result) 12802 : "w"(a), "w"(b) 12803 : /* No clobbers */); 12804 return result; 12805 } 12806 12807 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 12808 vtrn2q_p16 (poly16x8_t a, poly16x8_t b) 12809 { 12810 poly16x8_t result; 12811 __asm__ ("trn2 %0.8h,%1.8h,%2.8h" 12812 : "=w"(result) 12813 : "w"(a), "w"(b) 12814 : /* No clobbers */); 12815 return result; 12816 } 12817 12818 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 12819 vtrn2q_s8 (int8x16_t a, int8x16_t b) 12820 { 12821 int8x16_t result; 12822 __asm__ ("trn2 %0.16b,%1.16b,%2.16b" 12823 : "=w"(result) 12824 : "w"(a), "w"(b) 12825 : /* No clobbers */); 12826 return result; 12827 } 12828 12829 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 12830 vtrn2q_s16 (int16x8_t a, int16x8_t b) 12831 { 12832 int16x8_t result; 12833 __asm__ ("trn2 %0.8h,%1.8h,%2.8h" 12834 : "=w"(result) 12835 : "w"(a), "w"(b) 12836 : /* No clobbers */); 12837 return result; 12838 } 12839 12840 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 12841 vtrn2q_s32 (int32x4_t a, int32x4_t b) 12842 { 12843 int32x4_t result; 12844 __asm__ ("trn2 %0.4s,%1.4s,%2.4s" 12845 : "=w"(result) 12846 : "w"(a), "w"(b) 12847 : /* No clobbers */); 12848 return result; 12849 } 12850 12851 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 12852 vtrn2q_s64 (int64x2_t a, int64x2_t b) 12853 { 12854 int64x2_t result; 12855 __asm__ ("trn2 %0.2d,%1.2d,%2.2d" 12856 : "=w"(result) 12857 : "w"(a), "w"(b) 12858 : /* No clobbers */); 12859 return result; 12860 } 12861 12862 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 12863 vtrn2q_u8 (uint8x16_t a, uint8x16_t b) 12864 { 12865 uint8x16_t result; 12866 __asm__ ("trn2 %0.16b,%1.16b,%2.16b" 12867 : "=w"(result) 12868 : "w"(a), "w"(b) 12869 : /* No clobbers */); 12870 return result; 12871 } 12872 12873 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 12874 vtrn2q_u16 (uint16x8_t a, uint16x8_t b) 12875 { 12876 uint16x8_t result; 12877 __asm__ ("trn2 %0.8h,%1.8h,%2.8h" 12878 : "=w"(result) 12879 : "w"(a), "w"(b) 12880 : /* No clobbers */); 12881 return result; 12882 } 12883 12884 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 12885 vtrn2q_u32 (uint32x4_t a, uint32x4_t b) 12886 { 12887 uint32x4_t result; 12888 __asm__ ("trn2 %0.4s,%1.4s,%2.4s" 12889 : "=w"(result) 12890 : "w"(a), "w"(b) 12891 : /* No clobbers */); 12892 return result; 12893 } 12894 12895 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 12896 vtrn2q_u64 (uint64x2_t a, uint64x2_t b) 12897 { 12898 uint64x2_t result; 12899 __asm__ ("trn2 %0.2d,%1.2d,%2.2d" 12900 : "=w"(result) 12901 : "w"(a), "w"(b) 12902 : /* No clobbers */); 12903 return result; 12904 } 12905 12906 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 12907 vtst_p8 (poly8x8_t a, poly8x8_t b) 12908 { 12909 uint8x8_t result; 12910 __asm__ ("cmtst %0.8b, %1.8b, %2.8b" 12911 : "=w"(result) 12912 : "w"(a), "w"(b) 12913 : /* No clobbers */); 12914 return result; 12915 } 12916 12917 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 12918 vtst_p16 (poly16x4_t a, poly16x4_t b) 12919 { 12920 uint16x4_t result; 12921 __asm__ ("cmtst %0.4h, %1.4h, %2.4h" 12922 : "=w"(result) 12923 : "w"(a), "w"(b) 12924 : /* No clobbers */); 12925 return result; 12926 } 12927 12928 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 12929 vtstq_p8 (poly8x16_t a, poly8x16_t b) 12930 { 12931 uint8x16_t result; 12932 __asm__ ("cmtst %0.16b, %1.16b, %2.16b" 12933 : "=w"(result) 12934 : "w"(a), "w"(b) 12935 : /* No clobbers */); 12936 return result; 12937 } 12938 12939 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 12940 vtstq_p16 (poly16x8_t a, poly16x8_t b) 12941 { 12942 uint16x8_t result; 12943 __asm__ ("cmtst %0.8h, %1.8h, %2.8h" 12944 : "=w"(result) 12945 : "w"(a), "w"(b) 12946 : /* No clobbers */); 12947 return result; 12948 } 12949 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 12950 vuzp1_f32 (float32x2_t a, float32x2_t b) 12951 { 12952 float32x2_t result; 12953 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" 12954 : "=w"(result) 12955 : "w"(a), "w"(b) 12956 : /* No clobbers */); 12957 return result; 12958 } 12959 12960 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 12961 vuzp1_p8 (poly8x8_t a, poly8x8_t b) 12962 { 12963 poly8x8_t result; 12964 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" 12965 : "=w"(result) 12966 : "w"(a), "w"(b) 12967 : /* No clobbers */); 12968 return result; 12969 } 12970 12971 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 12972 vuzp1_p16 (poly16x4_t a, poly16x4_t b) 12973 { 12974 poly16x4_t result; 12975 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" 12976 : "=w"(result) 12977 : "w"(a), "w"(b) 12978 : /* No clobbers */); 12979 return result; 12980 } 12981 12982 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 12983 vuzp1_s8 (int8x8_t a, int8x8_t b) 12984 { 12985 int8x8_t result; 12986 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" 12987 : "=w"(result) 12988 : "w"(a), "w"(b) 12989 : /* No clobbers */); 12990 return result; 12991 } 12992 12993 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 12994 vuzp1_s16 (int16x4_t a, int16x4_t b) 12995 { 12996 int16x4_t result; 12997 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" 12998 : "=w"(result) 12999 : "w"(a), "w"(b) 13000 : /* No clobbers */); 13001 return result; 13002 } 13003 13004 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 13005 vuzp1_s32 (int32x2_t a, int32x2_t b) 13006 { 13007 int32x2_t result; 13008 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" 13009 : "=w"(result) 13010 : "w"(a), "w"(b) 13011 : /* No clobbers */); 13012 return result; 13013 } 13014 13015 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 13016 vuzp1_u8 (uint8x8_t a, uint8x8_t b) 13017 { 13018 uint8x8_t result; 13019 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" 13020 : "=w"(result) 13021 : "w"(a), "w"(b) 13022 : /* No clobbers */); 13023 return result; 13024 } 13025 13026 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 13027 vuzp1_u16 (uint16x4_t a, uint16x4_t b) 13028 { 13029 uint16x4_t result; 13030 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" 13031 : "=w"(result) 13032 : "w"(a), "w"(b) 13033 : /* No clobbers */); 13034 return result; 13035 } 13036 13037 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 13038 vuzp1_u32 (uint32x2_t a, uint32x2_t b) 13039 { 13040 uint32x2_t result; 13041 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" 13042 : "=w"(result) 13043 : "w"(a), "w"(b) 13044 : /* No clobbers */); 13045 return result; 13046 } 13047 13048 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 13049 vuzp1q_f32 (float32x4_t a, float32x4_t b) 13050 { 13051 float32x4_t result; 13052 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" 13053 : "=w"(result) 13054 : "w"(a), "w"(b) 13055 : /* No clobbers */); 13056 return result; 13057 } 13058 13059 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 13060 vuzp1q_f64 (float64x2_t a, float64x2_t b) 13061 { 13062 float64x2_t result; 13063 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" 13064 : "=w"(result) 13065 : "w"(a), "w"(b) 13066 : /* No clobbers */); 13067 return result; 13068 } 13069 13070 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 13071 vuzp1q_p8 (poly8x16_t a, poly8x16_t b) 13072 { 13073 poly8x16_t result; 13074 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" 13075 : "=w"(result) 13076 : "w"(a), "w"(b) 13077 : /* No clobbers */); 13078 return result; 13079 } 13080 13081 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 13082 vuzp1q_p16 (poly16x8_t a, poly16x8_t b) 13083 { 13084 poly16x8_t result; 13085 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" 13086 : "=w"(result) 13087 : "w"(a), "w"(b) 13088 : /* No clobbers */); 13089 return result; 13090 } 13091 13092 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 13093 vuzp1q_s8 (int8x16_t a, int8x16_t b) 13094 { 13095 int8x16_t result; 13096 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" 13097 : "=w"(result) 13098 : "w"(a), "w"(b) 13099 : /* No clobbers */); 13100 return result; 13101 } 13102 13103 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 13104 vuzp1q_s16 (int16x8_t a, int16x8_t b) 13105 { 13106 int16x8_t result; 13107 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" 13108 : "=w"(result) 13109 : "w"(a), "w"(b) 13110 : /* No clobbers */); 13111 return result; 13112 } 13113 13114 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 13115 vuzp1q_s32 (int32x4_t a, int32x4_t b) 13116 { 13117 int32x4_t result; 13118 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" 13119 : "=w"(result) 13120 : "w"(a), "w"(b) 13121 : /* No clobbers */); 13122 return result; 13123 } 13124 13125 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 13126 vuzp1q_s64 (int64x2_t a, int64x2_t b) 13127 { 13128 int64x2_t result; 13129 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" 13130 : "=w"(result) 13131 : "w"(a), "w"(b) 13132 : /* No clobbers */); 13133 return result; 13134 } 13135 13136 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 13137 vuzp1q_u8 (uint8x16_t a, uint8x16_t b) 13138 { 13139 uint8x16_t result; 13140 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" 13141 : "=w"(result) 13142 : "w"(a), "w"(b) 13143 : /* No clobbers */); 13144 return result; 13145 } 13146 13147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 13148 vuzp1q_u16 (uint16x8_t a, uint16x8_t b) 13149 { 13150 uint16x8_t result; 13151 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" 13152 : "=w"(result) 13153 : "w"(a), "w"(b) 13154 : /* No clobbers */); 13155 return result; 13156 } 13157 13158 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 13159 vuzp1q_u32 (uint32x4_t a, uint32x4_t b) 13160 { 13161 uint32x4_t result; 13162 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" 13163 : "=w"(result) 13164 : "w"(a), "w"(b) 13165 : /* No clobbers */); 13166 return result; 13167 } 13168 13169 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 13170 vuzp1q_u64 (uint64x2_t a, uint64x2_t b) 13171 { 13172 uint64x2_t result; 13173 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" 13174 : "=w"(result) 13175 : "w"(a), "w"(b) 13176 : /* No clobbers */); 13177 return result; 13178 } 13179 13180 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 13181 vuzp2_f32 (float32x2_t a, float32x2_t b) 13182 { 13183 float32x2_t result; 13184 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" 13185 : "=w"(result) 13186 : "w"(a), "w"(b) 13187 : /* No clobbers */); 13188 return result; 13189 } 13190 13191 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 13192 vuzp2_p8 (poly8x8_t a, poly8x8_t b) 13193 { 13194 poly8x8_t result; 13195 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" 13196 : "=w"(result) 13197 : "w"(a), "w"(b) 13198 : /* No clobbers */); 13199 return result; 13200 } 13201 13202 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 13203 vuzp2_p16 (poly16x4_t a, poly16x4_t b) 13204 { 13205 poly16x4_t result; 13206 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" 13207 : "=w"(result) 13208 : "w"(a), "w"(b) 13209 : /* No clobbers */); 13210 return result; 13211 } 13212 13213 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 13214 vuzp2_s8 (int8x8_t a, int8x8_t b) 13215 { 13216 int8x8_t result; 13217 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" 13218 : "=w"(result) 13219 : "w"(a), "w"(b) 13220 : /* No clobbers */); 13221 return result; 13222 } 13223 13224 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 13225 vuzp2_s16 (int16x4_t a, int16x4_t b) 13226 { 13227 int16x4_t result; 13228 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" 13229 : "=w"(result) 13230 : "w"(a), "w"(b) 13231 : /* No clobbers */); 13232 return result; 13233 } 13234 13235 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 13236 vuzp2_s32 (int32x2_t a, int32x2_t b) 13237 { 13238 int32x2_t result; 13239 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" 13240 : "=w"(result) 13241 : "w"(a), "w"(b) 13242 : /* No clobbers */); 13243 return result; 13244 } 13245 13246 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 13247 vuzp2_u8 (uint8x8_t a, uint8x8_t b) 13248 { 13249 uint8x8_t result; 13250 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" 13251 : "=w"(result) 13252 : "w"(a), "w"(b) 13253 : /* No clobbers */); 13254 return result; 13255 } 13256 13257 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 13258 vuzp2_u16 (uint16x4_t a, uint16x4_t b) 13259 { 13260 uint16x4_t result; 13261 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" 13262 : "=w"(result) 13263 : "w"(a), "w"(b) 13264 : /* No clobbers */); 13265 return result; 13266 } 13267 13268 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 13269 vuzp2_u32 (uint32x2_t a, uint32x2_t b) 13270 { 13271 uint32x2_t result; 13272 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" 13273 : "=w"(result) 13274 : "w"(a), "w"(b) 13275 : /* No clobbers */); 13276 return result; 13277 } 13278 13279 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 13280 vuzp2q_f32 (float32x4_t a, float32x4_t b) 13281 { 13282 float32x4_t result; 13283 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" 13284 : "=w"(result) 13285 : "w"(a), "w"(b) 13286 : /* No clobbers */); 13287 return result; 13288 } 13289 13290 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 13291 vuzp2q_f64 (float64x2_t a, float64x2_t b) 13292 { 13293 float64x2_t result; 13294 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" 13295 : "=w"(result) 13296 : "w"(a), "w"(b) 13297 : /* No clobbers */); 13298 return result; 13299 } 13300 13301 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 13302 vuzp2q_p8 (poly8x16_t a, poly8x16_t b) 13303 { 13304 poly8x16_t result; 13305 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" 13306 : "=w"(result) 13307 : "w"(a), "w"(b) 13308 : /* No clobbers */); 13309 return result; 13310 } 13311 13312 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 13313 vuzp2q_p16 (poly16x8_t a, poly16x8_t b) 13314 { 13315 poly16x8_t result; 13316 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" 13317 : "=w"(result) 13318 : "w"(a), "w"(b) 13319 : /* No clobbers */); 13320 return result; 13321 } 13322 13323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 13324 vuzp2q_s8 (int8x16_t a, int8x16_t b) 13325 { 13326 int8x16_t result; 13327 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" 13328 : "=w"(result) 13329 : "w"(a), "w"(b) 13330 : /* No clobbers */); 13331 return result; 13332 } 13333 13334 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 13335 vuzp2q_s16 (int16x8_t a, int16x8_t b) 13336 { 13337 int16x8_t result; 13338 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" 13339 : "=w"(result) 13340 : "w"(a), "w"(b) 13341 : /* No clobbers */); 13342 return result; 13343 } 13344 13345 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 13346 vuzp2q_s32 (int32x4_t a, int32x4_t b) 13347 { 13348 int32x4_t result; 13349 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" 13350 : "=w"(result) 13351 : "w"(a), "w"(b) 13352 : /* No clobbers */); 13353 return result; 13354 } 13355 13356 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 13357 vuzp2q_s64 (int64x2_t a, int64x2_t b) 13358 { 13359 int64x2_t result; 13360 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" 13361 : "=w"(result) 13362 : "w"(a), "w"(b) 13363 : /* No clobbers */); 13364 return result; 13365 } 13366 13367 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 13368 vuzp2q_u8 (uint8x16_t a, uint8x16_t b) 13369 { 13370 uint8x16_t result; 13371 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" 13372 : "=w"(result) 13373 : "w"(a), "w"(b) 13374 : /* No clobbers */); 13375 return result; 13376 } 13377 13378 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 13379 vuzp2q_u16 (uint16x8_t a, uint16x8_t b) 13380 { 13381 uint16x8_t result; 13382 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" 13383 : "=w"(result) 13384 : "w"(a), "w"(b) 13385 : /* No clobbers */); 13386 return result; 13387 } 13388 13389 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 13390 vuzp2q_u32 (uint32x4_t a, uint32x4_t b) 13391 { 13392 uint32x4_t result; 13393 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" 13394 : "=w"(result) 13395 : "w"(a), "w"(b) 13396 : /* No clobbers */); 13397 return result; 13398 } 13399 13400 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 13401 vuzp2q_u64 (uint64x2_t a, uint64x2_t b) 13402 { 13403 uint64x2_t result; 13404 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" 13405 : "=w"(result) 13406 : "w"(a), "w"(b) 13407 : /* No clobbers */); 13408 return result; 13409 } 13410 13411 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 13412 vzip1_f32 (float32x2_t a, float32x2_t b) 13413 { 13414 float32x2_t result; 13415 __asm__ ("zip1 %0.2s,%1.2s,%2.2s" 13416 : "=w"(result) 13417 : "w"(a), "w"(b) 13418 : /* No clobbers */); 13419 return result; 13420 } 13421 13422 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 13423 vzip1_p8 (poly8x8_t a, poly8x8_t b) 13424 { 13425 poly8x8_t result; 13426 __asm__ ("zip1 %0.8b,%1.8b,%2.8b" 13427 : "=w"(result) 13428 : "w"(a), "w"(b) 13429 : /* No clobbers */); 13430 return result; 13431 } 13432 13433 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 13434 vzip1_p16 (poly16x4_t a, poly16x4_t b) 13435 { 13436 poly16x4_t result; 13437 __asm__ ("zip1 %0.4h,%1.4h,%2.4h" 13438 : "=w"(result) 13439 : "w"(a), "w"(b) 13440 : /* No clobbers */); 13441 return result; 13442 } 13443 13444 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 13445 vzip1_s8 (int8x8_t a, int8x8_t b) 13446 { 13447 int8x8_t result; 13448 __asm__ ("zip1 %0.8b,%1.8b,%2.8b" 13449 : "=w"(result) 13450 : "w"(a), "w"(b) 13451 : /* No clobbers */); 13452 return result; 13453 } 13454 13455 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 13456 vzip1_s16 (int16x4_t a, int16x4_t b) 13457 { 13458 int16x4_t result; 13459 __asm__ ("zip1 %0.4h,%1.4h,%2.4h" 13460 : "=w"(result) 13461 : "w"(a), "w"(b) 13462 : /* No clobbers */); 13463 return result; 13464 } 13465 13466 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 13467 vzip1_s32 (int32x2_t a, int32x2_t b) 13468 { 13469 int32x2_t result; 13470 __asm__ ("zip1 %0.2s,%1.2s,%2.2s" 13471 : "=w"(result) 13472 : "w"(a), "w"(b) 13473 : /* No clobbers */); 13474 return result; 13475 } 13476 13477 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 13478 vzip1_u8 (uint8x8_t a, uint8x8_t b) 13479 { 13480 uint8x8_t result; 13481 __asm__ ("zip1 %0.8b,%1.8b,%2.8b" 13482 : "=w"(result) 13483 : "w"(a), "w"(b) 13484 : /* No clobbers */); 13485 return result; 13486 } 13487 13488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 13489 vzip1_u16 (uint16x4_t a, uint16x4_t b) 13490 { 13491 uint16x4_t result; 13492 __asm__ ("zip1 %0.4h,%1.4h,%2.4h" 13493 : "=w"(result) 13494 : "w"(a), "w"(b) 13495 : /* No clobbers */); 13496 return result; 13497 } 13498 13499 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 13500 vzip1_u32 (uint32x2_t a, uint32x2_t b) 13501 { 13502 uint32x2_t result; 13503 __asm__ ("zip1 %0.2s,%1.2s,%2.2s" 13504 : "=w"(result) 13505 : "w"(a), "w"(b) 13506 : /* No clobbers */); 13507 return result; 13508 } 13509 13510 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 13511 vzip1q_f32 (float32x4_t a, float32x4_t b) 13512 { 13513 float32x4_t result; 13514 __asm__ ("zip1 %0.4s,%1.4s,%2.4s" 13515 : "=w"(result) 13516 : "w"(a), "w"(b) 13517 : /* No clobbers */); 13518 return result; 13519 } 13520 13521 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 13522 vzip1q_f64 (float64x2_t a, float64x2_t b) 13523 { 13524 float64x2_t result; 13525 __asm__ ("zip1 %0.2d,%1.2d,%2.2d" 13526 : "=w"(result) 13527 : "w"(a), "w"(b) 13528 : /* No clobbers */); 13529 return result; 13530 } 13531 13532 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 13533 vzip1q_p8 (poly8x16_t a, poly8x16_t b) 13534 { 13535 poly8x16_t result; 13536 __asm__ ("zip1 %0.16b,%1.16b,%2.16b" 13537 : "=w"(result) 13538 : "w"(a), "w"(b) 13539 : /* No clobbers */); 13540 return result; 13541 } 13542 13543 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 13544 vzip1q_p16 (poly16x8_t a, poly16x8_t b) 13545 { 13546 poly16x8_t result; 13547 __asm__ ("zip1 %0.8h,%1.8h,%2.8h" 13548 : "=w"(result) 13549 : "w"(a), "w"(b) 13550 : /* No clobbers */); 13551 return result; 13552 } 13553 13554 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 13555 vzip1q_s8 (int8x16_t a, int8x16_t b) 13556 { 13557 int8x16_t result; 13558 __asm__ ("zip1 %0.16b,%1.16b,%2.16b" 13559 : "=w"(result) 13560 : "w"(a), "w"(b) 13561 : /* No clobbers */); 13562 return result; 13563 } 13564 13565 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 13566 vzip1q_s16 (int16x8_t a, int16x8_t b) 13567 { 13568 int16x8_t result; 13569 __asm__ ("zip1 %0.8h,%1.8h,%2.8h" 13570 : "=w"(result) 13571 : "w"(a), "w"(b) 13572 : /* No clobbers */); 13573 return result; 13574 } 13575 13576 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 13577 vzip1q_s32 (int32x4_t a, int32x4_t b) 13578 { 13579 int32x4_t result; 13580 __asm__ ("zip1 %0.4s,%1.4s,%2.4s" 13581 : "=w"(result) 13582 : "w"(a), "w"(b) 13583 : /* No clobbers */); 13584 return result; 13585 } 13586 13587 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 13588 vzip1q_s64 (int64x2_t a, int64x2_t b) 13589 { 13590 int64x2_t result; 13591 __asm__ ("zip1 %0.2d,%1.2d,%2.2d" 13592 : "=w"(result) 13593 : "w"(a), "w"(b) 13594 : /* No clobbers */); 13595 return result; 13596 } 13597 13598 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 13599 vzip1q_u8 (uint8x16_t a, uint8x16_t b) 13600 { 13601 uint8x16_t result; 13602 __asm__ ("zip1 %0.16b,%1.16b,%2.16b" 13603 : "=w"(result) 13604 : "w"(a), "w"(b) 13605 : /* No clobbers */); 13606 return result; 13607 } 13608 13609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 13610 vzip1q_u16 (uint16x8_t a, uint16x8_t b) 13611 { 13612 uint16x8_t result; 13613 __asm__ ("zip1 %0.8h,%1.8h,%2.8h" 13614 : "=w"(result) 13615 : "w"(a), "w"(b) 13616 : /* No clobbers */); 13617 return result; 13618 } 13619 13620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 13621 vzip1q_u32 (uint32x4_t a, uint32x4_t b) 13622 { 13623 uint32x4_t result; 13624 __asm__ ("zip1 %0.4s,%1.4s,%2.4s" 13625 : "=w"(result) 13626 : "w"(a), "w"(b) 13627 : /* No clobbers */); 13628 return result; 13629 } 13630 13631 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 13632 vzip1q_u64 (uint64x2_t a, uint64x2_t b) 13633 { 13634 uint64x2_t result; 13635 __asm__ ("zip1 %0.2d,%1.2d,%2.2d" 13636 : "=w"(result) 13637 : "w"(a), "w"(b) 13638 : /* No clobbers */); 13639 return result; 13640 } 13641 13642 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 13643 vzip2_f32 (float32x2_t a, float32x2_t b) 13644 { 13645 float32x2_t result; 13646 __asm__ ("zip2 %0.2s,%1.2s,%2.2s" 13647 : "=w"(result) 13648 : "w"(a), "w"(b) 13649 : /* No clobbers */); 13650 return result; 13651 } 13652 13653 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 13654 vzip2_p8 (poly8x8_t a, poly8x8_t b) 13655 { 13656 poly8x8_t result; 13657 __asm__ ("zip2 %0.8b,%1.8b,%2.8b" 13658 : "=w"(result) 13659 : "w"(a), "w"(b) 13660 : /* No clobbers */); 13661 return result; 13662 } 13663 13664 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 13665 vzip2_p16 (poly16x4_t a, poly16x4_t b) 13666 { 13667 poly16x4_t result; 13668 __asm__ ("zip2 %0.4h,%1.4h,%2.4h" 13669 : "=w"(result) 13670 : "w"(a), "w"(b) 13671 : /* No clobbers */); 13672 return result; 13673 } 13674 13675 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 13676 vzip2_s8 (int8x8_t a, int8x8_t b) 13677 { 13678 int8x8_t result; 13679 __asm__ ("zip2 %0.8b,%1.8b,%2.8b" 13680 : "=w"(result) 13681 : "w"(a), "w"(b) 13682 : /* No clobbers */); 13683 return result; 13684 } 13685 13686 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 13687 vzip2_s16 (int16x4_t a, int16x4_t b) 13688 { 13689 int16x4_t result; 13690 __asm__ ("zip2 %0.4h,%1.4h,%2.4h" 13691 : "=w"(result) 13692 : "w"(a), "w"(b) 13693 : /* No clobbers */); 13694 return result; 13695 } 13696 13697 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 13698 vzip2_s32 (int32x2_t a, int32x2_t b) 13699 { 13700 int32x2_t result; 13701 __asm__ ("zip2 %0.2s,%1.2s,%2.2s" 13702 : "=w"(result) 13703 : "w"(a), "w"(b) 13704 : /* No clobbers */); 13705 return result; 13706 } 13707 13708 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 13709 vzip2_u8 (uint8x8_t a, uint8x8_t b) 13710 { 13711 uint8x8_t result; 13712 __asm__ ("zip2 %0.8b,%1.8b,%2.8b" 13713 : "=w"(result) 13714 : "w"(a), "w"(b) 13715 : /* No clobbers */); 13716 return result; 13717 } 13718 13719 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 13720 vzip2_u16 (uint16x4_t a, uint16x4_t b) 13721 { 13722 uint16x4_t result; 13723 __asm__ ("zip2 %0.4h,%1.4h,%2.4h" 13724 : "=w"(result) 13725 : "w"(a), "w"(b) 13726 : /* No clobbers */); 13727 return result; 13728 } 13729 13730 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 13731 vzip2_u32 (uint32x2_t a, uint32x2_t b) 13732 { 13733 uint32x2_t result; 13734 __asm__ ("zip2 %0.2s,%1.2s,%2.2s" 13735 : "=w"(result) 13736 : "w"(a), "w"(b) 13737 : /* No clobbers */); 13738 return result; 13739 } 13740 13741 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 13742 vzip2q_f32 (float32x4_t a, float32x4_t b) 13743 { 13744 float32x4_t result; 13745 __asm__ ("zip2 %0.4s,%1.4s,%2.4s" 13746 : "=w"(result) 13747 : "w"(a), "w"(b) 13748 : /* No clobbers */); 13749 return result; 13750 } 13751 13752 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 13753 vzip2q_f64 (float64x2_t a, float64x2_t b) 13754 { 13755 float64x2_t result; 13756 __asm__ ("zip2 %0.2d,%1.2d,%2.2d" 13757 : "=w"(result) 13758 : "w"(a), "w"(b) 13759 : /* No clobbers */); 13760 return result; 13761 } 13762 13763 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 13764 vzip2q_p8 (poly8x16_t a, poly8x16_t b) 13765 { 13766 poly8x16_t result; 13767 __asm__ ("zip2 %0.16b,%1.16b,%2.16b" 13768 : "=w"(result) 13769 : "w"(a), "w"(b) 13770 : /* No clobbers */); 13771 return result; 13772 } 13773 13774 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 13775 vzip2q_p16 (poly16x8_t a, poly16x8_t b) 13776 { 13777 poly16x8_t result; 13778 __asm__ ("zip2 %0.8h,%1.8h,%2.8h" 13779 : "=w"(result) 13780 : "w"(a), "w"(b) 13781 : /* No clobbers */); 13782 return result; 13783 } 13784 13785 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 13786 vzip2q_s8 (int8x16_t a, int8x16_t b) 13787 { 13788 int8x16_t result; 13789 __asm__ ("zip2 %0.16b,%1.16b,%2.16b" 13790 : "=w"(result) 13791 : "w"(a), "w"(b) 13792 : /* No clobbers */); 13793 return result; 13794 } 13795 13796 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 13797 vzip2q_s16 (int16x8_t a, int16x8_t b) 13798 { 13799 int16x8_t result; 13800 __asm__ ("zip2 %0.8h,%1.8h,%2.8h" 13801 : "=w"(result) 13802 : "w"(a), "w"(b) 13803 : /* No clobbers */); 13804 return result; 13805 } 13806 13807 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 13808 vzip2q_s32 (int32x4_t a, int32x4_t b) 13809 { 13810 int32x4_t result; 13811 __asm__ ("zip2 %0.4s,%1.4s,%2.4s" 13812 : "=w"(result) 13813 : "w"(a), "w"(b) 13814 : /* No clobbers */); 13815 return result; 13816 } 13817 13818 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 13819 vzip2q_s64 (int64x2_t a, int64x2_t b) 13820 { 13821 int64x2_t result; 13822 __asm__ ("zip2 %0.2d,%1.2d,%2.2d" 13823 : "=w"(result) 13824 : "w"(a), "w"(b) 13825 : /* No clobbers */); 13826 return result; 13827 } 13828 13829 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 13830 vzip2q_u8 (uint8x16_t a, uint8x16_t b) 13831 { 13832 uint8x16_t result; 13833 __asm__ ("zip2 %0.16b,%1.16b,%2.16b" 13834 : "=w"(result) 13835 : "w"(a), "w"(b) 13836 : /* No clobbers */); 13837 return result; 13838 } 13839 13840 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 13841 vzip2q_u16 (uint16x8_t a, uint16x8_t b) 13842 { 13843 uint16x8_t result; 13844 __asm__ ("zip2 %0.8h,%1.8h,%2.8h" 13845 : "=w"(result) 13846 : "w"(a), "w"(b) 13847 : /* No clobbers */); 13848 return result; 13849 } 13850 13851 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 13852 vzip2q_u32 (uint32x4_t a, uint32x4_t b) 13853 { 13854 uint32x4_t result; 13855 __asm__ ("zip2 %0.4s,%1.4s,%2.4s" 13856 : "=w"(result) 13857 : "w"(a), "w"(b) 13858 : /* No clobbers */); 13859 return result; 13860 } 13861 13862 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 13863 vzip2q_u64 (uint64x2_t a, uint64x2_t b) 13864 { 13865 uint64x2_t result; 13866 __asm__ ("zip2 %0.2d,%1.2d,%2.2d" 13867 : "=w"(result) 13868 : "w"(a), "w"(b) 13869 : /* No clobbers */); 13870 return result; 13871 } 13872 13873 /* End of temporary inline asm implementations. */ 13874 13875 /* Start of temporary inline asm for vldn, vstn and friends. */ 13876 13877 /* Create struct element types for duplicating loads. 13878 13879 Create 2 element structures of: 13880 13881 +------+----+----+----+----+ 13882 | | 8 | 16 | 32 | 64 | 13883 +------+----+----+----+----+ 13884 |int | Y | Y | N | N | 13885 +------+----+----+----+----+ 13886 |uint | Y | Y | N | N | 13887 +------+----+----+----+----+ 13888 |float | - | - | N | N | 13889 +------+----+----+----+----+ 13890 |poly | Y | Y | - | - | 13891 +------+----+----+----+----+ 13892 13893 Create 3 element structures of: 13894 13895 +------+----+----+----+----+ 13896 | | 8 | 16 | 32 | 64 | 13897 +------+----+----+----+----+ 13898 |int | Y | Y | Y | Y | 13899 +------+----+----+----+----+ 13900 |uint | Y | Y | Y | Y | 13901 +------+----+----+----+----+ 13902 |float | - | - | Y | Y | 13903 +------+----+----+----+----+ 13904 |poly | Y | Y | - | - | 13905 +------+----+----+----+----+ 13906 13907 Create 4 element structures of: 13908 13909 +------+----+----+----+----+ 13910 | | 8 | 16 | 32 | 64 | 13911 +------+----+----+----+----+ 13912 |int | Y | N | N | Y | 13913 +------+----+----+----+----+ 13914 |uint | Y | N | N | Y | 13915 +------+----+----+----+----+ 13916 |float | - | - | N | Y | 13917 +------+----+----+----+----+ 13918 |poly | Y | N | - | - | 13919 +------+----+----+----+----+ 13920 13921 This is required for casting memory reference. */ 13922 #define __STRUCTN(t, sz, nelem) \ 13923 typedef struct t ## sz ## x ## nelem ## _t { \ 13924 t ## sz ## _t val[nelem]; \ 13925 } t ## sz ## x ## nelem ## _t; 13926 13927 /* 2-element structs. */ 13928 __STRUCTN (int, 8, 2) 13929 __STRUCTN (int, 16, 2) 13930 __STRUCTN (uint, 8, 2) 13931 __STRUCTN (uint, 16, 2) 13932 __STRUCTN (poly, 8, 2) 13933 __STRUCTN (poly, 16, 2) 13934 /* 3-element structs. */ 13935 __STRUCTN (int, 8, 3) 13936 __STRUCTN (int, 16, 3) 13937 __STRUCTN (int, 32, 3) 13938 __STRUCTN (int, 64, 3) 13939 __STRUCTN (uint, 8, 3) 13940 __STRUCTN (uint, 16, 3) 13941 __STRUCTN (uint, 32, 3) 13942 __STRUCTN (uint, 64, 3) 13943 __STRUCTN (float, 32, 3) 13944 __STRUCTN (float, 64, 3) 13945 __STRUCTN (poly, 8, 3) 13946 __STRUCTN (poly, 16, 3) 13947 /* 4-element structs. */ 13948 __STRUCTN (int, 8, 4) 13949 __STRUCTN (int, 64, 4) 13950 __STRUCTN (uint, 8, 4) 13951 __STRUCTN (uint, 64, 4) 13952 __STRUCTN (poly, 8, 4) 13953 __STRUCTN (float, 64, 4) 13954 #undef __STRUCTN 13955 13956 #define __LD2R_FUNC(rettype, structtype, ptrtype, \ 13957 regsuffix, funcsuffix, Q) \ 13958 __extension__ static __inline rettype \ 13959 __attribute__ ((__always_inline__)) \ 13960 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ 13961 { \ 13962 rettype result; \ 13963 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ 13964 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \ 13965 : "=Q"(result) \ 13966 : "Q"(*(const structtype *)ptr) \ 13967 : "memory", "v16", "v17"); \ 13968 return result; \ 13969 } 13970 13971 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,) 13972 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,) 13973 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,) 13974 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,) 13975 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,) 13976 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,) 13977 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,) 13978 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,) 13979 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,) 13980 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,) 13981 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,) 13982 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,) 13983 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q) 13984 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q) 13985 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q) 13986 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q) 13987 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q) 13988 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q) 13989 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q) 13990 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q) 13991 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q) 13992 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q) 13993 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q) 13994 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q) 13995 13996 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \ 13997 lnsuffix, funcsuffix, Q) \ 13998 __extension__ static __inline rettype \ 13999 __attribute__ ((__always_inline__)) \ 14000 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ 14001 rettype b, const int c) \ 14002 { \ 14003 rettype result; \ 14004 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ 14005 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \ 14006 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \ 14007 : "=Q"(result) \ 14008 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ 14009 : "memory", "v16", "v17"); \ 14010 return result; \ 14011 } 14012 14013 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,) 14014 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) 14015 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) 14016 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) 14017 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) 14018 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) 14019 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) 14020 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) 14021 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) 14022 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) 14023 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) 14024 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) 14025 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) 14026 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) 14027 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) 14028 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) 14029 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) 14030 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) 14031 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) 14032 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) 14033 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) 14034 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) 14035 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) 14036 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) 14037 14038 #define __LD3R_FUNC(rettype, structtype, ptrtype, \ 14039 regsuffix, funcsuffix, Q) \ 14040 __extension__ static __inline rettype \ 14041 __attribute__ ((__always_inline__)) \ 14042 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ 14043 { \ 14044 rettype result; \ 14045 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ 14046 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \ 14047 : "=Q"(result) \ 14048 : "Q"(*(const structtype *)ptr) \ 14049 : "memory", "v16", "v17", "v18"); \ 14050 return result; \ 14051 } 14052 14053 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,) 14054 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,) 14055 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,) 14056 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,) 14057 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,) 14058 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,) 14059 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,) 14060 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,) 14061 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,) 14062 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,) 14063 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,) 14064 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,) 14065 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q) 14066 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q) 14067 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q) 14068 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q) 14069 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q) 14070 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q) 14071 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q) 14072 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q) 14073 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q) 14074 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q) 14075 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q) 14076 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q) 14077 14078 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \ 14079 lnsuffix, funcsuffix, Q) \ 14080 __extension__ static __inline rettype \ 14081 __attribute__ ((__always_inline__)) \ 14082 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ 14083 rettype b, const int c) \ 14084 { \ 14085 rettype result; \ 14086 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ 14087 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \ 14088 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \ 14089 : "=Q"(result) \ 14090 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ 14091 : "memory", "v16", "v17", "v18"); \ 14092 return result; \ 14093 } 14094 14095 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,) 14096 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) 14097 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) 14098 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) 14099 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) 14100 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) 14101 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) 14102 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) 14103 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) 14104 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) 14105 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) 14106 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) 14107 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) 14108 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) 14109 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) 14110 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) 14111 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) 14112 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) 14113 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) 14114 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) 14115 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) 14116 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) 14117 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) 14118 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) 14119 14120 #define __LD4R_FUNC(rettype, structtype, ptrtype, \ 14121 regsuffix, funcsuffix, Q) \ 14122 __extension__ static __inline rettype \ 14123 __attribute__ ((__always_inline__)) \ 14124 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ 14125 { \ 14126 rettype result; \ 14127 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ 14128 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \ 14129 : "=Q"(result) \ 14130 : "Q"(*(const structtype *)ptr) \ 14131 : "memory", "v16", "v17", "v18", "v19"); \ 14132 return result; \ 14133 } 14134 14135 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,) 14136 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,) 14137 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,) 14138 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,) 14139 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,) 14140 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,) 14141 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,) 14142 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,) 14143 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,) 14144 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,) 14145 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,) 14146 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,) 14147 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q) 14148 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q) 14149 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q) 14150 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q) 14151 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q) 14152 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q) 14153 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q) 14154 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q) 14155 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q) 14156 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q) 14157 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q) 14158 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q) 14159 14160 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \ 14161 lnsuffix, funcsuffix, Q) \ 14162 __extension__ static __inline rettype \ 14163 __attribute__ ((__always_inline__)) \ 14164 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ 14165 rettype b, const int c) \ 14166 { \ 14167 rettype result; \ 14168 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ 14169 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \ 14170 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \ 14171 : "=Q"(result) \ 14172 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ 14173 : "memory", "v16", "v17", "v18", "v19"); \ 14174 return result; \ 14175 } 14176 14177 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,) 14178 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) 14179 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) 14180 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) 14181 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) 14182 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) 14183 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) 14184 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) 14185 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) 14186 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) 14187 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) 14188 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) 14189 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) 14190 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) 14191 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) 14192 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) 14193 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) 14194 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) 14195 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) 14196 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) 14197 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) 14198 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) 14199 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) 14200 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) 14201 14202 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ 14203 lnsuffix, funcsuffix, Q) \ 14204 typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \ 14205 __extension__ static __inline void \ 14206 __attribute__ ((__always_inline__)) \ 14207 vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ 14208 intype b, const int c) \ 14209 { \ 14210 __ST2_LANE_STRUCTURE_##intype *__p = \ 14211 (__ST2_LANE_STRUCTURE_##intype *)ptr; \ 14212 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ 14213 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ 14214 : "=Q"(*__p) \ 14215 : "Q"(b), "i"(c) \ 14216 : "v16", "v17"); \ 14217 } 14218 14219 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) 14220 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) 14221 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) 14222 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) 14223 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) 14224 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) 14225 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) 14226 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) 14227 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) 14228 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) 14229 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) 14230 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) 14231 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) 14232 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) 14233 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) 14234 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) 14235 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) 14236 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) 14237 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) 14238 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) 14239 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) 14240 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) 14241 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) 14242 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) 14243 14244 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ 14245 lnsuffix, funcsuffix, Q) \ 14246 typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \ 14247 __extension__ static __inline void \ 14248 __attribute__ ((__always_inline__)) \ 14249 vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ 14250 intype b, const int c) \ 14251 { \ 14252 __ST3_LANE_STRUCTURE_##intype *__p = \ 14253 (__ST3_LANE_STRUCTURE_##intype *)ptr; \ 14254 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ 14255 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ 14256 : "=Q"(*__p) \ 14257 : "Q"(b), "i"(c) \ 14258 : "v16", "v17", "v18"); \ 14259 } 14260 14261 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) 14262 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) 14263 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) 14264 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) 14265 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) 14266 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) 14267 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) 14268 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) 14269 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) 14270 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) 14271 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) 14272 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) 14273 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) 14274 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) 14275 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) 14276 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) 14277 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) 14278 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) 14279 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) 14280 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) 14281 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) 14282 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) 14283 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) 14284 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) 14285 14286 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ 14287 lnsuffix, funcsuffix, Q) \ 14288 typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \ 14289 __extension__ static __inline void \ 14290 __attribute__ ((__always_inline__)) \ 14291 vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ 14292 intype b, const int c) \ 14293 { \ 14294 __ST4_LANE_STRUCTURE_##intype *__p = \ 14295 (__ST4_LANE_STRUCTURE_##intype *)ptr; \ 14296 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ 14297 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ 14298 : "=Q"(*__p) \ 14299 : "Q"(b), "i"(c) \ 14300 : "v16", "v17", "v18", "v19"); \ 14301 } 14302 14303 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) 14304 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) 14305 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) 14306 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) 14307 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) 14308 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) 14309 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) 14310 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) 14311 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) 14312 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) 14313 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) 14314 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) 14315 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) 14316 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) 14317 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) 14318 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) 14319 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) 14320 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) 14321 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) 14322 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) 14323 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) 14324 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) 14325 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) 14326 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) 14327 14328 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 14329 vaddlv_s32 (int32x2_t a) 14330 { 14331 int64_t result; 14332 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); 14333 return result; 14334 } 14335 14336 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 14337 vaddlv_u32 (uint32x2_t a) 14338 { 14339 uint64_t result; 14340 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); 14341 return result; 14342 } 14343 14344 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 14345 vpaddd_s64 (int64x2_t __a) 14346 { 14347 return __builtin_aarch64_addpdi (__a); 14348 } 14349 14350 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 14351 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) 14352 { 14353 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); 14354 } 14355 14356 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 14357 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) 14358 { 14359 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); 14360 } 14361 14362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 14363 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) 14364 { 14365 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); 14366 } 14367 14368 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 14369 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) 14370 { 14371 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); 14372 } 14373 14374 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 14375 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) 14376 { 14377 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); 14378 } 14379 14380 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 14381 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) 14382 { 14383 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); 14384 } 14385 14386 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 14387 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) 14388 { 14389 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); 14390 } 14391 14392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 14393 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) 14394 { 14395 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); 14396 } 14397 14398 /* Table intrinsics. */ 14399 14400 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14401 vqtbl1_p8 (poly8x16_t a, uint8x8_t b) 14402 { 14403 poly8x8_t result; 14404 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 14405 : "=w"(result) 14406 : "w"(a), "w"(b) 14407 : /* No clobbers */); 14408 return result; 14409 } 14410 14411 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14412 vqtbl1_s8 (int8x16_t a, uint8x8_t b) 14413 { 14414 int8x8_t result; 14415 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 14416 : "=w"(result) 14417 : "w"(a), "w"(b) 14418 : /* No clobbers */); 14419 return result; 14420 } 14421 14422 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14423 vqtbl1_u8 (uint8x16_t a, uint8x8_t b) 14424 { 14425 uint8x8_t result; 14426 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 14427 : "=w"(result) 14428 : "w"(a), "w"(b) 14429 : /* No clobbers */); 14430 return result; 14431 } 14432 14433 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14434 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) 14435 { 14436 poly8x16_t result; 14437 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" 14438 : "=w"(result) 14439 : "w"(a), "w"(b) 14440 : /* No clobbers */); 14441 return result; 14442 } 14443 14444 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14445 vqtbl1q_s8 (int8x16_t a, uint8x16_t b) 14446 { 14447 int8x16_t result; 14448 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" 14449 : "=w"(result) 14450 : "w"(a), "w"(b) 14451 : /* No clobbers */); 14452 return result; 14453 } 14454 14455 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14456 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) 14457 { 14458 uint8x16_t result; 14459 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" 14460 : "=w"(result) 14461 : "w"(a), "w"(b) 14462 : /* No clobbers */); 14463 return result; 14464 } 14465 14466 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14467 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) 14468 { 14469 int8x8_t result; 14470 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14471 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" 14472 :"=w"(result) 14473 :"Q"(tab),"w"(idx) 14474 :"memory", "v16", "v17"); 14475 return result; 14476 } 14477 14478 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14479 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) 14480 { 14481 uint8x8_t result; 14482 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14483 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" 14484 :"=w"(result) 14485 :"Q"(tab),"w"(idx) 14486 :"memory", "v16", "v17"); 14487 return result; 14488 } 14489 14490 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14491 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) 14492 { 14493 poly8x8_t result; 14494 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14495 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" 14496 :"=w"(result) 14497 :"Q"(tab),"w"(idx) 14498 :"memory", "v16", "v17"); 14499 return result; 14500 } 14501 14502 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14503 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) 14504 { 14505 int8x16_t result; 14506 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14507 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" 14508 :"=w"(result) 14509 :"Q"(tab),"w"(idx) 14510 :"memory", "v16", "v17"); 14511 return result; 14512 } 14513 14514 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14515 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) 14516 { 14517 uint8x16_t result; 14518 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14519 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" 14520 :"=w"(result) 14521 :"Q"(tab),"w"(idx) 14522 :"memory", "v16", "v17"); 14523 return result; 14524 } 14525 14526 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14527 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) 14528 { 14529 poly8x16_t result; 14530 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14531 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" 14532 :"=w"(result) 14533 :"Q"(tab),"w"(idx) 14534 :"memory", "v16", "v17"); 14535 return result; 14536 } 14537 14538 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14539 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) 14540 { 14541 int8x8_t result; 14542 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14543 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" 14544 :"=w"(result) 14545 :"Q"(tab),"w"(idx) 14546 :"memory", "v16", "v17", "v18"); 14547 return result; 14548 } 14549 14550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14551 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) 14552 { 14553 uint8x8_t result; 14554 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14555 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" 14556 :"=w"(result) 14557 :"Q"(tab),"w"(idx) 14558 :"memory", "v16", "v17", "v18"); 14559 return result; 14560 } 14561 14562 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14563 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) 14564 { 14565 poly8x8_t result; 14566 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14567 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" 14568 :"=w"(result) 14569 :"Q"(tab),"w"(idx) 14570 :"memory", "v16", "v17", "v18"); 14571 return result; 14572 } 14573 14574 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14575 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) 14576 { 14577 int8x16_t result; 14578 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14579 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" 14580 :"=w"(result) 14581 :"Q"(tab),"w"(idx) 14582 :"memory", "v16", "v17", "v18"); 14583 return result; 14584 } 14585 14586 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14587 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) 14588 { 14589 uint8x16_t result; 14590 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14591 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" 14592 :"=w"(result) 14593 :"Q"(tab),"w"(idx) 14594 :"memory", "v16", "v17", "v18"); 14595 return result; 14596 } 14597 14598 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14599 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) 14600 { 14601 poly8x16_t result; 14602 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14603 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" 14604 :"=w"(result) 14605 :"Q"(tab),"w"(idx) 14606 :"memory", "v16", "v17", "v18"); 14607 return result; 14608 } 14609 14610 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14611 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) 14612 { 14613 int8x8_t result; 14614 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14615 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" 14616 :"=w"(result) 14617 :"Q"(tab),"w"(idx) 14618 :"memory", "v16", "v17", "v18", "v19"); 14619 return result; 14620 } 14621 14622 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14623 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) 14624 { 14625 uint8x8_t result; 14626 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14627 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" 14628 :"=w"(result) 14629 :"Q"(tab),"w"(idx) 14630 :"memory", "v16", "v17", "v18", "v19"); 14631 return result; 14632 } 14633 14634 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14635 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) 14636 { 14637 poly8x8_t result; 14638 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14639 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" 14640 :"=w"(result) 14641 :"Q"(tab),"w"(idx) 14642 :"memory", "v16", "v17", "v18", "v19"); 14643 return result; 14644 } 14645 14646 14647 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14648 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) 14649 { 14650 int8x16_t result; 14651 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14652 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" 14653 :"=w"(result) 14654 :"Q"(tab),"w"(idx) 14655 :"memory", "v16", "v17", "v18", "v19"); 14656 return result; 14657 } 14658 14659 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14660 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) 14661 { 14662 uint8x16_t result; 14663 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14664 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" 14665 :"=w"(result) 14666 :"Q"(tab),"w"(idx) 14667 :"memory", "v16", "v17", "v18", "v19"); 14668 return result; 14669 } 14670 14671 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14672 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) 14673 { 14674 poly8x16_t result; 14675 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14676 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" 14677 :"=w"(result) 14678 :"Q"(tab),"w"(idx) 14679 :"memory", "v16", "v17", "v18", "v19"); 14680 return result; 14681 } 14682 14683 14684 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14685 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) 14686 { 14687 int8x8_t result = r; 14688 __asm__ ("tbx %0.8b,{%1.16b},%2.8b" 14689 : "+w"(result) 14690 : "w"(tab), "w"(idx) 14691 : /* No clobbers */); 14692 return result; 14693 } 14694 14695 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14696 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) 14697 { 14698 uint8x8_t result = r; 14699 __asm__ ("tbx %0.8b,{%1.16b},%2.8b" 14700 : "+w"(result) 14701 : "w"(tab), "w"(idx) 14702 : /* No clobbers */); 14703 return result; 14704 } 14705 14706 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14707 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) 14708 { 14709 poly8x8_t result = r; 14710 __asm__ ("tbx %0.8b,{%1.16b},%2.8b" 14711 : "+w"(result) 14712 : "w"(tab), "w"(idx) 14713 : /* No clobbers */); 14714 return result; 14715 } 14716 14717 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14718 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) 14719 { 14720 int8x16_t result = r; 14721 __asm__ ("tbx %0.16b,{%1.16b},%2.16b" 14722 : "+w"(result) 14723 : "w"(tab), "w"(idx) 14724 : /* No clobbers */); 14725 return result; 14726 } 14727 14728 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14729 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) 14730 { 14731 uint8x16_t result = r; 14732 __asm__ ("tbx %0.16b,{%1.16b},%2.16b" 14733 : "+w"(result) 14734 : "w"(tab), "w"(idx) 14735 : /* No clobbers */); 14736 return result; 14737 } 14738 14739 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14740 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) 14741 { 14742 poly8x16_t result = r; 14743 __asm__ ("tbx %0.16b,{%1.16b},%2.16b" 14744 : "+w"(result) 14745 : "w"(tab), "w"(idx) 14746 : /* No clobbers */); 14747 return result; 14748 } 14749 14750 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14751 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) 14752 { 14753 int8x8_t result = r; 14754 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14755 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" 14756 :"+w"(result) 14757 :"Q"(tab),"w"(idx) 14758 :"memory", "v16", "v17"); 14759 return result; 14760 } 14761 14762 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14763 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) 14764 { 14765 uint8x8_t result = r; 14766 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14767 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" 14768 :"+w"(result) 14769 :"Q"(tab),"w"(idx) 14770 :"memory", "v16", "v17"); 14771 return result; 14772 } 14773 14774 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14775 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) 14776 { 14777 poly8x8_t result = r; 14778 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14779 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" 14780 :"+w"(result) 14781 :"Q"(tab),"w"(idx) 14782 :"memory", "v16", "v17"); 14783 return result; 14784 } 14785 14786 14787 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14788 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) 14789 { 14790 int8x16_t result = r; 14791 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14792 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" 14793 :"+w"(result) 14794 :"Q"(tab),"w"(idx) 14795 :"memory", "v16", "v17"); 14796 return result; 14797 } 14798 14799 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14800 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) 14801 { 14802 uint8x16_t result = r; 14803 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14804 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" 14805 :"+w"(result) 14806 :"Q"(tab),"w"(idx) 14807 :"memory", "v16", "v17"); 14808 return result; 14809 } 14810 14811 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14812 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) 14813 { 14814 poly8x16_t result = r; 14815 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" 14816 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" 14817 :"+w"(result) 14818 :"Q"(tab),"w"(idx) 14819 :"memory", "v16", "v17"); 14820 return result; 14821 } 14822 14823 14824 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14825 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) 14826 { 14827 int8x8_t result = r; 14828 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14829 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" 14830 :"+w"(result) 14831 :"Q"(tab),"w"(idx) 14832 :"memory", "v16", "v17", "v18"); 14833 return result; 14834 } 14835 14836 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14837 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) 14838 { 14839 uint8x8_t result = r; 14840 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14841 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" 14842 :"+w"(result) 14843 :"Q"(tab),"w"(idx) 14844 :"memory", "v16", "v17", "v18"); 14845 return result; 14846 } 14847 14848 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14849 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) 14850 { 14851 poly8x8_t result = r; 14852 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14853 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" 14854 :"+w"(result) 14855 :"Q"(tab),"w"(idx) 14856 :"memory", "v16", "v17", "v18"); 14857 return result; 14858 } 14859 14860 14861 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14862 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) 14863 { 14864 int8x16_t result = r; 14865 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14866 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" 14867 :"+w"(result) 14868 :"Q"(tab),"w"(idx) 14869 :"memory", "v16", "v17", "v18"); 14870 return result; 14871 } 14872 14873 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14874 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) 14875 { 14876 uint8x16_t result = r; 14877 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14878 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" 14879 :"+w"(result) 14880 :"Q"(tab),"w"(idx) 14881 :"memory", "v16", "v17", "v18"); 14882 return result; 14883 } 14884 14885 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14886 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) 14887 { 14888 poly8x16_t result = r; 14889 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" 14890 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" 14891 :"+w"(result) 14892 :"Q"(tab),"w"(idx) 14893 :"memory", "v16", "v17", "v18"); 14894 return result; 14895 } 14896 14897 14898 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14899 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) 14900 { 14901 int8x8_t result = r; 14902 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14903 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" 14904 :"+w"(result) 14905 :"Q"(tab),"w"(idx) 14906 :"memory", "v16", "v17", "v18", "v19"); 14907 return result; 14908 } 14909 14910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14911 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) 14912 { 14913 uint8x8_t result = r; 14914 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14915 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" 14916 :"+w"(result) 14917 :"Q"(tab),"w"(idx) 14918 :"memory", "v16", "v17", "v18", "v19"); 14919 return result; 14920 } 14921 14922 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14923 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) 14924 { 14925 poly8x8_t result = r; 14926 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14927 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" 14928 :"+w"(result) 14929 :"Q"(tab),"w"(idx) 14930 :"memory", "v16", "v17", "v18", "v19"); 14931 return result; 14932 } 14933 14934 14935 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 14936 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) 14937 { 14938 int8x16_t result = r; 14939 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14940 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" 14941 :"+w"(result) 14942 :"Q"(tab),"w"(idx) 14943 :"memory", "v16", "v17", "v18", "v19"); 14944 return result; 14945 } 14946 14947 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 14948 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) 14949 { 14950 uint8x16_t result = r; 14951 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14952 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" 14953 :"+w"(result) 14954 :"Q"(tab),"w"(idx) 14955 :"memory", "v16", "v17", "v18", "v19"); 14956 return result; 14957 } 14958 14959 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 14960 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) 14961 { 14962 poly8x16_t result = r; 14963 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" 14964 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" 14965 :"+w"(result) 14966 :"Q"(tab),"w"(idx) 14967 :"memory", "v16", "v17", "v18", "v19"); 14968 return result; 14969 } 14970 14971 /* V7 legacy table intrinsics. */ 14972 14973 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 14974 vtbl1_s8 (int8x8_t tab, int8x8_t idx) 14975 { 14976 int8x8_t result; 14977 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); 14978 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 14979 : "=w"(result) 14980 : "w"(temp), "w"(idx) 14981 : /* No clobbers */); 14982 return result; 14983 } 14984 14985 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 14986 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) 14987 { 14988 uint8x8_t result; 14989 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); 14990 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 14991 : "=w"(result) 14992 : "w"(temp), "w"(idx) 14993 : /* No clobbers */); 14994 return result; 14995 } 14996 14997 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 14998 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) 14999 { 15000 poly8x8_t result; 15001 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); 15002 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 15003 : "=w"(result) 15004 : "w"(temp), "w"(idx) 15005 : /* No clobbers */); 15006 return result; 15007 } 15008 15009 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 15010 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) 15011 { 15012 int8x8_t result; 15013 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); 15014 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 15015 : "=w"(result) 15016 : "w"(temp), "w"(idx) 15017 : /* No clobbers */); 15018 return result; 15019 } 15020 15021 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15022 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) 15023 { 15024 uint8x8_t result; 15025 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); 15026 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 15027 : "=w"(result) 15028 : "w"(temp), "w"(idx) 15029 : /* No clobbers */); 15030 return result; 15031 } 15032 15033 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 15034 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) 15035 { 15036 poly8x8_t result; 15037 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); 15038 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" 15039 : "=w"(result) 15040 : "w"(temp), "w"(idx) 15041 : /* No clobbers */); 15042 return result; 15043 } 15044 15045 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 15046 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) 15047 { 15048 int8x8_t result; 15049 int8x16x2_t temp; 15050 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); 15051 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); 15052 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15053 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15054 : "=w"(result) 15055 : "Q"(temp), "w"(idx) 15056 : "v16", "v17", "memory"); 15057 return result; 15058 } 15059 15060 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15061 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) 15062 { 15063 uint8x8_t result; 15064 uint8x16x2_t temp; 15065 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); 15066 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); 15067 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15068 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15069 : "=w"(result) 15070 : "Q"(temp), "w"(idx) 15071 : "v16", "v17", "memory"); 15072 return result; 15073 } 15074 15075 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 15076 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) 15077 { 15078 poly8x8_t result; 15079 poly8x16x2_t temp; 15080 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); 15081 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); 15082 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15083 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15084 : "=w"(result) 15085 : "Q"(temp), "w"(idx) 15086 : "v16", "v17", "memory"); 15087 return result; 15088 } 15089 15090 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 15091 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) 15092 { 15093 int8x8_t result; 15094 int8x16x2_t temp; 15095 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); 15096 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); 15097 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15098 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15099 : "=w"(result) 15100 : "Q"(temp), "w"(idx) 15101 : "v16", "v17", "memory"); 15102 return result; 15103 } 15104 15105 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15106 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) 15107 { 15108 uint8x8_t result; 15109 uint8x16x2_t temp; 15110 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); 15111 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); 15112 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15113 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15114 : "=w"(result) 15115 : "Q"(temp), "w"(idx) 15116 : "v16", "v17", "memory"); 15117 return result; 15118 } 15119 15120 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 15121 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) 15122 { 15123 poly8x8_t result; 15124 poly8x16x2_t temp; 15125 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); 15126 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); 15127 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15128 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15129 : "=w"(result) 15130 : "Q"(temp), "w"(idx) 15131 : "v16", "v17", "memory"); 15132 return result; 15133 } 15134 15135 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 15136 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) 15137 { 15138 int8x8_t result = r; 15139 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); 15140 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" 15141 : "+w"(result) 15142 : "w"(temp), "w"(idx) 15143 : /* No clobbers */); 15144 return result; 15145 } 15146 15147 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15148 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) 15149 { 15150 uint8x8_t result = r; 15151 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); 15152 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" 15153 : "+w"(result) 15154 : "w"(temp), "w"(idx) 15155 : /* No clobbers */); 15156 return result; 15157 } 15158 15159 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 15160 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) 15161 { 15162 poly8x8_t result = r; 15163 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); 15164 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" 15165 : "+w"(result) 15166 : "w"(temp), "w"(idx) 15167 : /* No clobbers */); 15168 return result; 15169 } 15170 15171 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 15172 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx) 15173 { 15174 int8x8_t result = r; 15175 int8x16x2_t temp; 15176 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); 15177 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); 15178 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15179 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15180 : "+w"(result) 15181 : "Q"(temp), "w"(idx) 15182 : "v16", "v17", "memory"); 15183 return result; 15184 } 15185 15186 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15187 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx) 15188 { 15189 uint8x8_t result = r; 15190 uint8x16x2_t temp; 15191 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); 15192 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); 15193 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15194 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15195 : "+w"(result) 15196 : "Q"(temp), "w"(idx) 15197 : "v16", "v17", "memory"); 15198 return result; 15199 } 15200 15201 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 15202 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx) 15203 { 15204 poly8x8_t result = r; 15205 poly8x16x2_t temp; 15206 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); 15207 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); 15208 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" 15209 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" 15210 : "+w"(result) 15211 : "Q"(temp), "w"(idx) 15212 : "v16", "v17", "memory"); 15213 return result; 15214 } 15215 15216 /* End of temporary inline asm. */ 15217 15218 /* Start of optimal implementations in approved order. */ 15219 15220 /* vabs */ 15221 15222 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 15223 vabs_f32 (float32x2_t __a) 15224 { 15225 return __builtin_aarch64_absv2sf (__a); 15226 } 15227 15228 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 15229 vabs_f64 (float64x1_t __a) 15230 { 15231 return __builtin_fabs (__a); 15232 } 15233 15234 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 15235 vabs_s8 (int8x8_t __a) 15236 { 15237 return __builtin_aarch64_absv8qi (__a); 15238 } 15239 15240 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 15241 vabs_s16 (int16x4_t __a) 15242 { 15243 return __builtin_aarch64_absv4hi (__a); 15244 } 15245 15246 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 15247 vabs_s32 (int32x2_t __a) 15248 { 15249 return __builtin_aarch64_absv2si (__a); 15250 } 15251 15252 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 15253 vabs_s64 (int64x1_t __a) 15254 { 15255 return __builtin_llabs (__a); 15256 } 15257 15258 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 15259 vabsq_f32 (float32x4_t __a) 15260 { 15261 return __builtin_aarch64_absv4sf (__a); 15262 } 15263 15264 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 15265 vabsq_f64 (float64x2_t __a) 15266 { 15267 return __builtin_aarch64_absv2df (__a); 15268 } 15269 15270 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 15271 vabsq_s8 (int8x16_t __a) 15272 { 15273 return __builtin_aarch64_absv16qi (__a); 15274 } 15275 15276 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 15277 vabsq_s16 (int16x8_t __a) 15278 { 15279 return __builtin_aarch64_absv8hi (__a); 15280 } 15281 15282 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 15283 vabsq_s32 (int32x4_t __a) 15284 { 15285 return __builtin_aarch64_absv4si (__a); 15286 } 15287 15288 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 15289 vabsq_s64 (int64x2_t __a) 15290 { 15291 return __builtin_aarch64_absv2di (__a); 15292 } 15293 15294 /* vadd */ 15295 15296 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 15297 vaddd_s64 (int64x1_t __a, int64x1_t __b) 15298 { 15299 return __a + __b; 15300 } 15301 15302 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15303 vaddd_u64 (uint64x1_t __a, uint64x1_t __b) 15304 { 15305 return __a + __b; 15306 } 15307 15308 /* vaddv */ 15309 15310 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 15311 vaddv_s8 (int8x8_t __a) 15312 { 15313 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); 15314 } 15315 15316 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 15317 vaddv_s16 (int16x4_t __a) 15318 { 15319 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); 15320 } 15321 15322 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 15323 vaddv_s32 (int32x2_t __a) 15324 { 15325 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); 15326 } 15327 15328 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 15329 vaddv_u8 (uint8x8_t __a) 15330 { 15331 return vget_lane_u8 ((uint8x8_t) 15332 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 15333 0); 15334 } 15335 15336 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 15337 vaddv_u16 (uint16x4_t __a) 15338 { 15339 return vget_lane_u16 ((uint16x4_t) 15340 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 15341 0); 15342 } 15343 15344 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 15345 vaddv_u32 (uint32x2_t __a) 15346 { 15347 return vget_lane_u32 ((uint32x2_t) 15348 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 15349 0); 15350 } 15351 15352 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 15353 vaddvq_s8 (int8x16_t __a) 15354 { 15355 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 15356 0); 15357 } 15358 15359 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 15360 vaddvq_s16 (int16x8_t __a) 15361 { 15362 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); 15363 } 15364 15365 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 15366 vaddvq_s32 (int32x4_t __a) 15367 { 15368 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); 15369 } 15370 15371 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 15372 vaddvq_s64 (int64x2_t __a) 15373 { 15374 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); 15375 } 15376 15377 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 15378 vaddvq_u8 (uint8x16_t __a) 15379 { 15380 return vgetq_lane_u8 ((uint8x16_t) 15381 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 15382 0); 15383 } 15384 15385 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 15386 vaddvq_u16 (uint16x8_t __a) 15387 { 15388 return vgetq_lane_u16 ((uint16x8_t) 15389 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 15390 0); 15391 } 15392 15393 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 15394 vaddvq_u32 (uint32x4_t __a) 15395 { 15396 return vgetq_lane_u32 ((uint32x4_t) 15397 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 15398 0); 15399 } 15400 15401 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 15402 vaddvq_u64 (uint64x2_t __a) 15403 { 15404 return vgetq_lane_u64 ((uint64x2_t) 15405 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 15406 0); 15407 } 15408 15409 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 15410 vaddv_f32 (float32x2_t __a) 15411 { 15412 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a); 15413 return vget_lane_f32 (__t, 0); 15414 } 15415 15416 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 15417 vaddvq_f32 (float32x4_t __a) 15418 { 15419 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a); 15420 return vgetq_lane_f32 (__t, 0); 15421 } 15422 15423 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 15424 vaddvq_f64 (float64x2_t __a) 15425 { 15426 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a); 15427 return vgetq_lane_f64 (__t, 0); 15428 } 15429 15430 /* vbsl */ 15431 15432 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 15433 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) 15434 { 15435 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c); 15436 } 15437 15438 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 15439 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) 15440 { 15441 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c); 15442 } 15443 15444 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 15445 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) 15446 { 15447 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c); 15448 } 15449 15450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 15451 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) 15452 { 15453 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c); 15454 } 15455 15456 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 15457 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) 15458 { 15459 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c); 15460 } 15461 15462 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 15463 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) 15464 { 15465 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c); 15466 } 15467 15468 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 15469 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) 15470 { 15471 return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c); 15472 } 15473 15474 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15475 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) 15476 { 15477 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c); 15478 } 15479 15480 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 15481 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) 15482 { 15483 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c); 15484 } 15485 15486 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15487 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) 15488 { 15489 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c); 15490 } 15491 15492 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15493 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) 15494 { 15495 return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c); 15496 } 15497 15498 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 15499 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) 15500 { 15501 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c); 15502 } 15503 15504 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 15505 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c) 15506 { 15507 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c); 15508 } 15509 15510 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 15511 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) 15512 { 15513 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c); 15514 } 15515 15516 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 15517 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) 15518 { 15519 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c); 15520 } 15521 15522 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 15523 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) 15524 { 15525 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c); 15526 } 15527 15528 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 15529 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) 15530 { 15531 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c); 15532 } 15533 15534 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 15535 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) 15536 { 15537 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c); 15538 } 15539 15540 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 15541 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) 15542 { 15543 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c); 15544 } 15545 15546 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 15547 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) 15548 { 15549 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c); 15550 } 15551 15552 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 15553 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) 15554 { 15555 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c); 15556 } 15557 15558 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15559 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) 15560 { 15561 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c); 15562 } 15563 15564 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15565 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) 15566 { 15567 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c); 15568 } 15569 15570 #ifdef __ARM_FEATURE_CRYPTO 15571 15572 /* vaes */ 15573 15574 static __inline uint8x16_t 15575 vaeseq_u8 (uint8x16_t data, uint8x16_t key) 15576 { 15577 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); 15578 } 15579 15580 static __inline uint8x16_t 15581 vaesdq_u8 (uint8x16_t data, uint8x16_t key) 15582 { 15583 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); 15584 } 15585 15586 static __inline uint8x16_t 15587 vaesmcq_u8 (uint8x16_t data) 15588 { 15589 return __builtin_aarch64_crypto_aesmcv16qi_uu (data); 15590 } 15591 15592 static __inline uint8x16_t 15593 vaesimcq_u8 (uint8x16_t data) 15594 { 15595 return __builtin_aarch64_crypto_aesimcv16qi_uu (data); 15596 } 15597 15598 #endif 15599 15600 /* vcage */ 15601 15602 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 15603 vcages_f32 (float32_t __a, float32_t __b) 15604 { 15605 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; 15606 } 15607 15608 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15609 vcage_f32 (float32x2_t __a, float32x2_t __b) 15610 { 15611 return vabs_f32 (__a) >= vabs_f32 (__b); 15612 } 15613 15614 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15615 vcageq_f32 (float32x4_t __a, float32x4_t __b) 15616 { 15617 return vabsq_f32 (__a) >= vabsq_f32 (__b); 15618 } 15619 15620 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 15621 vcaged_f64 (float64_t __a, float64_t __b) 15622 { 15623 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; 15624 } 15625 15626 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15627 vcageq_f64 (float64x2_t __a, float64x2_t __b) 15628 { 15629 return vabsq_f64 (__a) >= vabsq_f64 (__b); 15630 } 15631 15632 /* vcagt */ 15633 15634 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 15635 vcagts_f32 (float32_t __a, float32_t __b) 15636 { 15637 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; 15638 } 15639 15640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15641 vcagt_f32 (float32x2_t __a, float32x2_t __b) 15642 { 15643 return vabs_f32 (__a) > vabs_f32 (__b); 15644 } 15645 15646 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15647 vcagtq_f32 (float32x4_t __a, float32x4_t __b) 15648 { 15649 return vabsq_f32 (__a) > vabsq_f32 (__b); 15650 } 15651 15652 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 15653 vcagtd_f64 (float64_t __a, float64_t __b) 15654 { 15655 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; 15656 } 15657 15658 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15659 vcagtq_f64 (float64x2_t __a, float64x2_t __b) 15660 { 15661 return vabsq_f64 (__a) > vabsq_f64 (__b); 15662 } 15663 15664 /* vcale */ 15665 15666 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15667 vcale_f32 (float32x2_t __a, float32x2_t __b) 15668 { 15669 return vabs_f32 (__a) <= vabs_f32 (__b); 15670 } 15671 15672 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15673 vcaleq_f32 (float32x4_t __a, float32x4_t __b) 15674 { 15675 return vabsq_f32 (__a) <= vabsq_f32 (__b); 15676 } 15677 15678 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15679 vcaleq_f64 (float64x2_t __a, float64x2_t __b) 15680 { 15681 return vabsq_f64 (__a) <= vabsq_f64 (__b); 15682 } 15683 15684 /* vcalt */ 15685 15686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15687 vcalt_f32 (float32x2_t __a, float32x2_t __b) 15688 { 15689 return vabs_f32 (__a) < vabs_f32 (__b); 15690 } 15691 15692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15693 vcaltq_f32 (float32x4_t __a, float32x4_t __b) 15694 { 15695 return vabsq_f32 (__a) < vabsq_f32 (__b); 15696 } 15697 15698 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15699 vcaltq_f64 (float64x2_t __a, float64x2_t __b) 15700 { 15701 return vabsq_f64 (__a) < vabsq_f64 (__b); 15702 } 15703 15704 /* vceq - vector. */ 15705 15706 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15707 vceq_f32 (float32x2_t __a, float32x2_t __b) 15708 { 15709 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); 15710 } 15711 15712 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15713 vceq_f64 (float64x1_t __a, float64x1_t __b) 15714 { 15715 return __a == __b ? -1ll : 0ll; 15716 } 15717 15718 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15719 vceq_p8 (poly8x8_t __a, poly8x8_t __b) 15720 { 15721 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, 15722 (int8x8_t) __b); 15723 } 15724 15725 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15726 vceq_s8 (int8x8_t __a, int8x8_t __b) 15727 { 15728 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); 15729 } 15730 15731 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 15732 vceq_s16 (int16x4_t __a, int16x4_t __b) 15733 { 15734 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); 15735 } 15736 15737 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15738 vceq_s32 (int32x2_t __a, int32x2_t __b) 15739 { 15740 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); 15741 } 15742 15743 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15744 vceq_s64 (int64x1_t __a, int64x1_t __b) 15745 { 15746 return __a == __b ? -1ll : 0ll; 15747 } 15748 15749 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15750 vceq_u8 (uint8x8_t __a, uint8x8_t __b) 15751 { 15752 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, 15753 (int8x8_t) __b); 15754 } 15755 15756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 15757 vceq_u16 (uint16x4_t __a, uint16x4_t __b) 15758 { 15759 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, 15760 (int16x4_t) __b); 15761 } 15762 15763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15764 vceq_u32 (uint32x2_t __a, uint32x2_t __b) 15765 { 15766 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, 15767 (int32x2_t) __b); 15768 } 15769 15770 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15771 vceq_u64 (uint64x1_t __a, uint64x1_t __b) 15772 { 15773 return __a == __b ? -1ll : 0ll; 15774 } 15775 15776 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15777 vceqq_f32 (float32x4_t __a, float32x4_t __b) 15778 { 15779 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); 15780 } 15781 15782 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15783 vceqq_f64 (float64x2_t __a, float64x2_t __b) 15784 { 15785 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); 15786 } 15787 15788 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 15789 vceqq_p8 (poly8x16_t __a, poly8x16_t __b) 15790 { 15791 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, 15792 (int8x16_t) __b); 15793 } 15794 15795 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 15796 vceqq_s8 (int8x16_t __a, int8x16_t __b) 15797 { 15798 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); 15799 } 15800 15801 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 15802 vceqq_s16 (int16x8_t __a, int16x8_t __b) 15803 { 15804 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); 15805 } 15806 15807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15808 vceqq_s32 (int32x4_t __a, int32x4_t __b) 15809 { 15810 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); 15811 } 15812 15813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15814 vceqq_s64 (int64x2_t __a, int64x2_t __b) 15815 { 15816 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); 15817 } 15818 15819 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 15820 vceqq_u8 (uint8x16_t __a, uint8x16_t __b) 15821 { 15822 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, 15823 (int8x16_t) __b); 15824 } 15825 15826 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 15827 vceqq_u16 (uint16x8_t __a, uint16x8_t __b) 15828 { 15829 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, 15830 (int16x8_t) __b); 15831 } 15832 15833 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15834 vceqq_u32 (uint32x4_t __a, uint32x4_t __b) 15835 { 15836 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, 15837 (int32x4_t) __b); 15838 } 15839 15840 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15841 vceqq_u64 (uint64x2_t __a, uint64x2_t __b) 15842 { 15843 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, 15844 (int64x2_t) __b); 15845 } 15846 15847 /* vceq - scalar. */ 15848 15849 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 15850 vceqs_f32 (float32_t __a, float32_t __b) 15851 { 15852 return __a == __b ? -1 : 0; 15853 } 15854 15855 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15856 vceqd_s64 (int64x1_t __a, int64x1_t __b) 15857 { 15858 return __a == __b ? -1ll : 0ll; 15859 } 15860 15861 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15862 vceqd_u64 (uint64x1_t __a, uint64x1_t __b) 15863 { 15864 return __a == __b ? -1ll : 0ll; 15865 } 15866 15867 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 15868 vceqd_f64 (float64_t __a, float64_t __b) 15869 { 15870 return __a == __b ? -1ll : 0ll; 15871 } 15872 15873 /* vceqz - vector. */ 15874 15875 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15876 vceqz_f32 (float32x2_t __a) 15877 { 15878 float32x2_t __b = {0.0f, 0.0f}; 15879 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); 15880 } 15881 15882 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15883 vceqz_f64 (float64x1_t __a) 15884 { 15885 return __a == 0.0 ? -1ll : 0ll; 15886 } 15887 15888 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15889 vceqz_p8 (poly8x8_t __a) 15890 { 15891 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 15892 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, 15893 (int8x8_t) __b); 15894 } 15895 15896 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15897 vceqz_s8 (int8x8_t __a) 15898 { 15899 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 15900 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); 15901 } 15902 15903 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 15904 vceqz_s16 (int16x4_t __a) 15905 { 15906 int16x4_t __b = {0, 0, 0, 0}; 15907 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); 15908 } 15909 15910 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15911 vceqz_s32 (int32x2_t __a) 15912 { 15913 int32x2_t __b = {0, 0}; 15914 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); 15915 } 15916 15917 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15918 vceqz_s64 (int64x1_t __a) 15919 { 15920 return __a == 0ll ? -1ll : 0ll; 15921 } 15922 15923 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 15924 vceqz_u8 (uint8x8_t __a) 15925 { 15926 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 15927 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, 15928 (int8x8_t) __b); 15929 } 15930 15931 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 15932 vceqz_u16 (uint16x4_t __a) 15933 { 15934 uint16x4_t __b = {0, 0, 0, 0}; 15935 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, 15936 (int16x4_t) __b); 15937 } 15938 15939 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 15940 vceqz_u32 (uint32x2_t __a) 15941 { 15942 uint32x2_t __b = {0, 0}; 15943 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, 15944 (int32x2_t) __b); 15945 } 15946 15947 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 15948 vceqz_u64 (uint64x1_t __a) 15949 { 15950 return __a == 0ll ? -1ll : 0ll; 15951 } 15952 15953 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15954 vceqzq_f32 (float32x4_t __a) 15955 { 15956 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; 15957 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); 15958 } 15959 15960 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15961 vceqzq_f64 (float64x2_t __a) 15962 { 15963 float64x2_t __b = {0.0, 0.0}; 15964 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); 15965 } 15966 15967 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 15968 vceqzq_p8 (poly8x16_t __a) 15969 { 15970 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 15971 0, 0, 0, 0, 0, 0, 0, 0}; 15972 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, 15973 (int8x16_t) __b); 15974 } 15975 15976 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 15977 vceqzq_s8 (int8x16_t __a) 15978 { 15979 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 15980 0, 0, 0, 0, 0, 0, 0, 0}; 15981 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); 15982 } 15983 15984 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 15985 vceqzq_s16 (int16x8_t __a) 15986 { 15987 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 15988 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); 15989 } 15990 15991 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 15992 vceqzq_s32 (int32x4_t __a) 15993 { 15994 int32x4_t __b = {0, 0, 0, 0}; 15995 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); 15996 } 15997 15998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 15999 vceqzq_s64 (int64x2_t __a) 16000 { 16001 int64x2_t __b = {0, 0}; 16002 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); 16003 } 16004 16005 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16006 vceqzq_u8 (uint8x16_t __a) 16007 { 16008 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 16009 0, 0, 0, 0, 0, 0, 0, 0}; 16010 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, 16011 (int8x16_t) __b); 16012 } 16013 16014 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16015 vceqzq_u16 (uint16x8_t __a) 16016 { 16017 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16018 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, 16019 (int16x8_t) __b); 16020 } 16021 16022 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16023 vceqzq_u32 (uint32x4_t __a) 16024 { 16025 uint32x4_t __b = {0, 0, 0, 0}; 16026 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, 16027 (int32x4_t) __b); 16028 } 16029 16030 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16031 vceqzq_u64 (uint64x2_t __a) 16032 { 16033 uint64x2_t __b = {0, 0}; 16034 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, 16035 (int64x2_t) __b); 16036 } 16037 16038 /* vceqz - scalar. */ 16039 16040 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 16041 vceqzs_f32 (float32_t __a) 16042 { 16043 return __a == 0.0f ? -1 : 0; 16044 } 16045 16046 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16047 vceqzd_s64 (int64x1_t __a) 16048 { 16049 return __a == 0 ? -1ll : 0ll; 16050 } 16051 16052 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16053 vceqzd_u64 (int64x1_t __a) 16054 { 16055 return __a == 0 ? -1ll : 0ll; 16056 } 16057 16058 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 16059 vceqzd_f64 (float64_t __a) 16060 { 16061 return __a == 0.0 ? -1ll : 0ll; 16062 } 16063 16064 /* vcge - vector. */ 16065 16066 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16067 vcge_f32 (float32x2_t __a, float32x2_t __b) 16068 { 16069 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); 16070 } 16071 16072 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16073 vcge_f64 (float64x1_t __a, float64x1_t __b) 16074 { 16075 return __a >= __b ? -1ll : 0ll; 16076 } 16077 16078 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16079 vcge_p8 (poly8x8_t __a, poly8x8_t __b) 16080 { 16081 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, 16082 (int8x8_t) __b); 16083 } 16084 16085 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16086 vcge_s8 (int8x8_t __a, int8x8_t __b) 16087 { 16088 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); 16089 } 16090 16091 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16092 vcge_s16 (int16x4_t __a, int16x4_t __b) 16093 { 16094 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); 16095 } 16096 16097 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16098 vcge_s32 (int32x2_t __a, int32x2_t __b) 16099 { 16100 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); 16101 } 16102 16103 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16104 vcge_s64 (int64x1_t __a, int64x1_t __b) 16105 { 16106 return __a >= __b ? -1ll : 0ll; 16107 } 16108 16109 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16110 vcge_u8 (uint8x8_t __a, uint8x8_t __b) 16111 { 16112 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, 16113 (int8x8_t) __b); 16114 } 16115 16116 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16117 vcge_u16 (uint16x4_t __a, uint16x4_t __b) 16118 { 16119 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, 16120 (int16x4_t) __b); 16121 } 16122 16123 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16124 vcge_u32 (uint32x2_t __a, uint32x2_t __b) 16125 { 16126 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, 16127 (int32x2_t) __b); 16128 } 16129 16130 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16131 vcge_u64 (uint64x1_t __a, uint64x1_t __b) 16132 { 16133 return __a >= __b ? -1ll : 0ll; 16134 } 16135 16136 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16137 vcgeq_f32 (float32x4_t __a, float32x4_t __b) 16138 { 16139 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); 16140 } 16141 16142 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16143 vcgeq_f64 (float64x2_t __a, float64x2_t __b) 16144 { 16145 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); 16146 } 16147 16148 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16149 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b) 16150 { 16151 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, 16152 (int8x16_t) __b); 16153 } 16154 16155 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16156 vcgeq_s8 (int8x16_t __a, int8x16_t __b) 16157 { 16158 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); 16159 } 16160 16161 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16162 vcgeq_s16 (int16x8_t __a, int16x8_t __b) 16163 { 16164 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); 16165 } 16166 16167 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16168 vcgeq_s32 (int32x4_t __a, int32x4_t __b) 16169 { 16170 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); 16171 } 16172 16173 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16174 vcgeq_s64 (int64x2_t __a, int64x2_t __b) 16175 { 16176 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); 16177 } 16178 16179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16180 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) 16181 { 16182 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, 16183 (int8x16_t) __b); 16184 } 16185 16186 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16187 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) 16188 { 16189 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, 16190 (int16x8_t) __b); 16191 } 16192 16193 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16194 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) 16195 { 16196 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, 16197 (int32x4_t) __b); 16198 } 16199 16200 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16201 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) 16202 { 16203 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, 16204 (int64x2_t) __b); 16205 } 16206 16207 /* vcge - scalar. */ 16208 16209 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 16210 vcges_f32 (float32_t __a, float32_t __b) 16211 { 16212 return __a >= __b ? -1 : 0; 16213 } 16214 16215 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16216 vcged_s64 (int64x1_t __a, int64x1_t __b) 16217 { 16218 return __a >= __b ? -1ll : 0ll; 16219 } 16220 16221 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16222 vcged_u64 (uint64x1_t __a, uint64x1_t __b) 16223 { 16224 return __a >= __b ? -1ll : 0ll; 16225 } 16226 16227 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 16228 vcged_f64 (float64_t __a, float64_t __b) 16229 { 16230 return __a >= __b ? -1ll : 0ll; 16231 } 16232 16233 /* vcgez - vector. */ 16234 16235 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16236 vcgez_f32 (float32x2_t __a) 16237 { 16238 float32x2_t __b = {0.0f, 0.0f}; 16239 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); 16240 } 16241 16242 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16243 vcgez_f64 (float64x1_t __a) 16244 { 16245 return __a >= 0.0 ? -1ll : 0ll; 16246 } 16247 16248 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16249 vcgez_p8 (poly8x8_t __a) 16250 { 16251 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16252 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, 16253 (int8x8_t) __b); 16254 } 16255 16256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16257 vcgez_s8 (int8x8_t __a) 16258 { 16259 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16260 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); 16261 } 16262 16263 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16264 vcgez_s16 (int16x4_t __a) 16265 { 16266 int16x4_t __b = {0, 0, 0, 0}; 16267 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); 16268 } 16269 16270 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16271 vcgez_s32 (int32x2_t __a) 16272 { 16273 int32x2_t __b = {0, 0}; 16274 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); 16275 } 16276 16277 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16278 vcgez_s64 (int64x1_t __a) 16279 { 16280 return __a >= 0ll ? -1ll : 0ll; 16281 } 16282 16283 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16284 vcgez_u8 (uint8x8_t __a) 16285 { 16286 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16287 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, 16288 (int8x8_t) __b); 16289 } 16290 16291 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16292 vcgez_u16 (uint16x4_t __a) 16293 { 16294 uint16x4_t __b = {0, 0, 0, 0}; 16295 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, 16296 (int16x4_t) __b); 16297 } 16298 16299 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16300 vcgez_u32 (uint32x2_t __a) 16301 { 16302 uint32x2_t __b = {0, 0}; 16303 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, 16304 (int32x2_t) __b); 16305 } 16306 16307 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16308 vcgez_u64 (uint64x1_t __a) 16309 { 16310 return __a >= 0ll ? -1ll : 0ll; 16311 } 16312 16313 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16314 vcgezq_f32 (float32x4_t __a) 16315 { 16316 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; 16317 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); 16318 } 16319 16320 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16321 vcgezq_f64 (float64x2_t __a) 16322 { 16323 float64x2_t __b = {0.0, 0.0}; 16324 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); 16325 } 16326 16327 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16328 vcgezq_p8 (poly8x16_t __a) 16329 { 16330 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 16331 0, 0, 0, 0, 0, 0, 0, 0}; 16332 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, 16333 (int8x16_t) __b); 16334 } 16335 16336 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16337 vcgezq_s8 (int8x16_t __a) 16338 { 16339 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 16340 0, 0, 0, 0, 0, 0, 0, 0}; 16341 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); 16342 } 16343 16344 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16345 vcgezq_s16 (int16x8_t __a) 16346 { 16347 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16348 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); 16349 } 16350 16351 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16352 vcgezq_s32 (int32x4_t __a) 16353 { 16354 int32x4_t __b = {0, 0, 0, 0}; 16355 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); 16356 } 16357 16358 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16359 vcgezq_s64 (int64x2_t __a) 16360 { 16361 int64x2_t __b = {0, 0}; 16362 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); 16363 } 16364 16365 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16366 vcgezq_u8 (uint8x16_t __a) 16367 { 16368 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 16369 0, 0, 0, 0, 0, 0, 0, 0}; 16370 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, 16371 (int8x16_t) __b); 16372 } 16373 16374 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16375 vcgezq_u16 (uint16x8_t __a) 16376 { 16377 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16378 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, 16379 (int16x8_t) __b); 16380 } 16381 16382 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16383 vcgezq_u32 (uint32x4_t __a) 16384 { 16385 uint32x4_t __b = {0, 0, 0, 0}; 16386 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, 16387 (int32x4_t) __b); 16388 } 16389 16390 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16391 vcgezq_u64 (uint64x2_t __a) 16392 { 16393 uint64x2_t __b = {0, 0}; 16394 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, 16395 (int64x2_t) __b); 16396 } 16397 16398 /* vcgez - scalar. */ 16399 16400 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 16401 vcgezs_f32 (float32_t __a) 16402 { 16403 return __a >= 0.0f ? -1 : 0; 16404 } 16405 16406 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16407 vcgezd_s64 (int64x1_t __a) 16408 { 16409 return __a >= 0 ? -1ll : 0ll; 16410 } 16411 16412 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16413 vcgezd_u64 (int64x1_t __a) 16414 { 16415 return __a >= 0 ? -1ll : 0ll; 16416 } 16417 16418 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 16419 vcgezd_f64 (float64_t __a) 16420 { 16421 return __a >= 0.0 ? -1ll : 0ll; 16422 } 16423 16424 /* vcgt - vector. */ 16425 16426 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16427 vcgt_f32 (float32x2_t __a, float32x2_t __b) 16428 { 16429 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); 16430 } 16431 16432 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16433 vcgt_f64 (float64x1_t __a, float64x1_t __b) 16434 { 16435 return __a > __b ? -1ll : 0ll; 16436 } 16437 16438 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16439 vcgt_p8 (poly8x8_t __a, poly8x8_t __b) 16440 { 16441 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, 16442 (int8x8_t) __b); 16443 } 16444 16445 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16446 vcgt_s8 (int8x8_t __a, int8x8_t __b) 16447 { 16448 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); 16449 } 16450 16451 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16452 vcgt_s16 (int16x4_t __a, int16x4_t __b) 16453 { 16454 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); 16455 } 16456 16457 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16458 vcgt_s32 (int32x2_t __a, int32x2_t __b) 16459 { 16460 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); 16461 } 16462 16463 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16464 vcgt_s64 (int64x1_t __a, int64x1_t __b) 16465 { 16466 return __a > __b ? -1ll : 0ll; 16467 } 16468 16469 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16470 vcgt_u8 (uint8x8_t __a, uint8x8_t __b) 16471 { 16472 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, 16473 (int8x8_t) __b); 16474 } 16475 16476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16477 vcgt_u16 (uint16x4_t __a, uint16x4_t __b) 16478 { 16479 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, 16480 (int16x4_t) __b); 16481 } 16482 16483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16484 vcgt_u32 (uint32x2_t __a, uint32x2_t __b) 16485 { 16486 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, 16487 (int32x2_t) __b); 16488 } 16489 16490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16491 vcgt_u64 (uint64x1_t __a, uint64x1_t __b) 16492 { 16493 return __a > __b ? -1ll : 0ll; 16494 } 16495 16496 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16497 vcgtq_f32 (float32x4_t __a, float32x4_t __b) 16498 { 16499 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); 16500 } 16501 16502 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16503 vcgtq_f64 (float64x2_t __a, float64x2_t __b) 16504 { 16505 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); 16506 } 16507 16508 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16509 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b) 16510 { 16511 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, 16512 (int8x16_t) __b); 16513 } 16514 16515 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16516 vcgtq_s8 (int8x16_t __a, int8x16_t __b) 16517 { 16518 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); 16519 } 16520 16521 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16522 vcgtq_s16 (int16x8_t __a, int16x8_t __b) 16523 { 16524 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); 16525 } 16526 16527 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16528 vcgtq_s32 (int32x4_t __a, int32x4_t __b) 16529 { 16530 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); 16531 } 16532 16533 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16534 vcgtq_s64 (int64x2_t __a, int64x2_t __b) 16535 { 16536 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); 16537 } 16538 16539 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16540 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) 16541 { 16542 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, 16543 (int8x16_t) __b); 16544 } 16545 16546 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16547 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) 16548 { 16549 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, 16550 (int16x8_t) __b); 16551 } 16552 16553 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16554 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) 16555 { 16556 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, 16557 (int32x4_t) __b); 16558 } 16559 16560 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16561 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) 16562 { 16563 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, 16564 (int64x2_t) __b); 16565 } 16566 16567 /* vcgt - scalar. */ 16568 16569 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 16570 vcgts_f32 (float32_t __a, float32_t __b) 16571 { 16572 return __a > __b ? -1 : 0; 16573 } 16574 16575 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16576 vcgtd_s64 (int64x1_t __a, int64x1_t __b) 16577 { 16578 return __a > __b ? -1ll : 0ll; 16579 } 16580 16581 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16582 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b) 16583 { 16584 return __a > __b ? -1ll : 0ll; 16585 } 16586 16587 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 16588 vcgtd_f64 (float64_t __a, float64_t __b) 16589 { 16590 return __a > __b ? -1ll : 0ll; 16591 } 16592 16593 /* vcgtz - vector. */ 16594 16595 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16596 vcgtz_f32 (float32x2_t __a) 16597 { 16598 float32x2_t __b = {0.0f, 0.0f}; 16599 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); 16600 } 16601 16602 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16603 vcgtz_f64 (float64x1_t __a) 16604 { 16605 return __a > 0.0 ? -1ll : 0ll; 16606 } 16607 16608 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16609 vcgtz_p8 (poly8x8_t __a) 16610 { 16611 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16612 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, 16613 (int8x8_t) __b); 16614 } 16615 16616 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16617 vcgtz_s8 (int8x8_t __a) 16618 { 16619 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16620 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); 16621 } 16622 16623 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16624 vcgtz_s16 (int16x4_t __a) 16625 { 16626 int16x4_t __b = {0, 0, 0, 0}; 16627 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); 16628 } 16629 16630 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16631 vcgtz_s32 (int32x2_t __a) 16632 { 16633 int32x2_t __b = {0, 0}; 16634 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); 16635 } 16636 16637 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16638 vcgtz_s64 (int64x1_t __a) 16639 { 16640 return __a > 0ll ? -1ll : 0ll; 16641 } 16642 16643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16644 vcgtz_u8 (uint8x8_t __a) 16645 { 16646 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16647 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, 16648 (int8x8_t) __b); 16649 } 16650 16651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16652 vcgtz_u16 (uint16x4_t __a) 16653 { 16654 uint16x4_t __b = {0, 0, 0, 0}; 16655 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, 16656 (int16x4_t) __b); 16657 } 16658 16659 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16660 vcgtz_u32 (uint32x2_t __a) 16661 { 16662 uint32x2_t __b = {0, 0}; 16663 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, 16664 (int32x2_t) __b); 16665 } 16666 16667 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16668 vcgtz_u64 (uint64x1_t __a) 16669 { 16670 return __a > 0ll ? -1ll : 0ll; 16671 } 16672 16673 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16674 vcgtzq_f32 (float32x4_t __a) 16675 { 16676 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; 16677 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); 16678 } 16679 16680 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16681 vcgtzq_f64 (float64x2_t __a) 16682 { 16683 float64x2_t __b = {0.0, 0.0}; 16684 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); 16685 } 16686 16687 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16688 vcgtzq_p8 (poly8x16_t __a) 16689 { 16690 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 16691 0, 0, 0, 0, 0, 0, 0, 0}; 16692 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, 16693 (int8x16_t) __b); 16694 } 16695 16696 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16697 vcgtzq_s8 (int8x16_t __a) 16698 { 16699 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 16700 0, 0, 0, 0, 0, 0, 0, 0}; 16701 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); 16702 } 16703 16704 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16705 vcgtzq_s16 (int16x8_t __a) 16706 { 16707 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16708 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); 16709 } 16710 16711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16712 vcgtzq_s32 (int32x4_t __a) 16713 { 16714 int32x4_t __b = {0, 0, 0, 0}; 16715 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); 16716 } 16717 16718 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16719 vcgtzq_s64 (int64x2_t __a) 16720 { 16721 int64x2_t __b = {0, 0}; 16722 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); 16723 } 16724 16725 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16726 vcgtzq_u8 (uint8x16_t __a) 16727 { 16728 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 16729 0, 0, 0, 0, 0, 0, 0, 0}; 16730 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, 16731 (int8x16_t) __b); 16732 } 16733 16734 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16735 vcgtzq_u16 (uint16x8_t __a) 16736 { 16737 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16738 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, 16739 (int16x8_t) __b); 16740 } 16741 16742 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16743 vcgtzq_u32 (uint32x4_t __a) 16744 { 16745 uint32x4_t __b = {0, 0, 0, 0}; 16746 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, 16747 (int32x4_t) __b); 16748 } 16749 16750 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16751 vcgtzq_u64 (uint64x2_t __a) 16752 { 16753 uint64x2_t __b = {0, 0}; 16754 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, 16755 (int64x2_t) __b); 16756 } 16757 16758 /* vcgtz - scalar. */ 16759 16760 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 16761 vcgtzs_f32 (float32_t __a) 16762 { 16763 return __a > 0.0f ? -1 : 0; 16764 } 16765 16766 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16767 vcgtzd_s64 (int64x1_t __a) 16768 { 16769 return __a > 0 ? -1ll : 0ll; 16770 } 16771 16772 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16773 vcgtzd_u64 (int64x1_t __a) 16774 { 16775 return __a > 0 ? -1ll : 0ll; 16776 } 16777 16778 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 16779 vcgtzd_f64 (float64_t __a) 16780 { 16781 return __a > 0.0 ? -1ll : 0ll; 16782 } 16783 16784 /* vcle - vector. */ 16785 16786 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16787 vcle_f32 (float32x2_t __a, float32x2_t __b) 16788 { 16789 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a); 16790 } 16791 16792 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16793 vcle_f64 (float64x1_t __a, float64x1_t __b) 16794 { 16795 return __a <= __b ? -1ll : 0ll; 16796 } 16797 16798 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16799 vcle_p8 (poly8x8_t __a, poly8x8_t __b) 16800 { 16801 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b, 16802 (int8x8_t) __a); 16803 } 16804 16805 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16806 vcle_s8 (int8x8_t __a, int8x8_t __b) 16807 { 16808 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a); 16809 } 16810 16811 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16812 vcle_s16 (int16x4_t __a, int16x4_t __b) 16813 { 16814 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a); 16815 } 16816 16817 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16818 vcle_s32 (int32x2_t __a, int32x2_t __b) 16819 { 16820 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a); 16821 } 16822 16823 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16824 vcle_s64 (int64x1_t __a, int64x1_t __b) 16825 { 16826 return __a <= __b ? -1ll : 0ll; 16827 } 16828 16829 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16830 vcle_u8 (uint8x8_t __a, uint8x8_t __b) 16831 { 16832 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b, 16833 (int8x8_t) __a); 16834 } 16835 16836 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16837 vcle_u16 (uint16x4_t __a, uint16x4_t __b) 16838 { 16839 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b, 16840 (int16x4_t) __a); 16841 } 16842 16843 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16844 vcle_u32 (uint32x2_t __a, uint32x2_t __b) 16845 { 16846 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b, 16847 (int32x2_t) __a); 16848 } 16849 16850 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16851 vcle_u64 (uint64x1_t __a, uint64x1_t __b) 16852 { 16853 return __a <= __b ? -1ll : 0ll; 16854 } 16855 16856 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16857 vcleq_f32 (float32x4_t __a, float32x4_t __b) 16858 { 16859 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a); 16860 } 16861 16862 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16863 vcleq_f64 (float64x2_t __a, float64x2_t __b) 16864 { 16865 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a); 16866 } 16867 16868 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16869 vcleq_p8 (poly8x16_t __a, poly8x16_t __b) 16870 { 16871 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b, 16872 (int8x16_t) __a); 16873 } 16874 16875 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16876 vcleq_s8 (int8x16_t __a, int8x16_t __b) 16877 { 16878 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a); 16879 } 16880 16881 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16882 vcleq_s16 (int16x8_t __a, int16x8_t __b) 16883 { 16884 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a); 16885 } 16886 16887 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16888 vcleq_s32 (int32x4_t __a, int32x4_t __b) 16889 { 16890 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a); 16891 } 16892 16893 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16894 vcleq_s64 (int64x2_t __a, int64x2_t __b) 16895 { 16896 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a); 16897 } 16898 16899 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 16900 vcleq_u8 (uint8x16_t __a, uint8x16_t __b) 16901 { 16902 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b, 16903 (int8x16_t) __a); 16904 } 16905 16906 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 16907 vcleq_u16 (uint16x8_t __a, uint16x8_t __b) 16908 { 16909 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b, 16910 (int16x8_t) __a); 16911 } 16912 16913 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 16914 vcleq_u32 (uint32x4_t __a, uint32x4_t __b) 16915 { 16916 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b, 16917 (int32x4_t) __a); 16918 } 16919 16920 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 16921 vcleq_u64 (uint64x2_t __a, uint64x2_t __b) 16922 { 16923 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b, 16924 (int64x2_t) __a); 16925 } 16926 16927 /* vcle - scalar. */ 16928 16929 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 16930 vcles_f32 (float32_t __a, float32_t __b) 16931 { 16932 return __a <= __b ? -1 : 0; 16933 } 16934 16935 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16936 vcled_s64 (int64x1_t __a, int64x1_t __b) 16937 { 16938 return __a <= __b ? -1ll : 0ll; 16939 } 16940 16941 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16942 vcled_u64 (uint64x1_t __a, uint64x1_t __b) 16943 { 16944 return __a <= __b ? -1ll : 0ll; 16945 } 16946 16947 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 16948 vcled_f64 (float64_t __a, float64_t __b) 16949 { 16950 return __a <= __b ? -1ll : 0ll; 16951 } 16952 16953 /* vclez - vector. */ 16954 16955 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16956 vclez_f32 (float32x2_t __a) 16957 { 16958 float32x2_t __b = {0.0f, 0.0f}; 16959 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b); 16960 } 16961 16962 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16963 vclez_f64 (float64x1_t __a) 16964 { 16965 return __a <= 0.0 ? -1ll : 0ll; 16966 } 16967 16968 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16969 vclez_p8 (poly8x8_t __a) 16970 { 16971 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16972 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a, 16973 (int8x8_t) __b); 16974 } 16975 16976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 16977 vclez_s8 (int8x8_t __a) 16978 { 16979 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 16980 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b); 16981 } 16982 16983 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 16984 vclez_s16 (int16x4_t __a) 16985 { 16986 int16x4_t __b = {0, 0, 0, 0}; 16987 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b); 16988 } 16989 16990 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 16991 vclez_s32 (int32x2_t __a) 16992 { 16993 int32x2_t __b = {0, 0}; 16994 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b); 16995 } 16996 16997 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 16998 vclez_s64 (int64x1_t __a) 16999 { 17000 return __a <= 0ll ? -1ll : 0ll; 17001 } 17002 17003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17004 vclez_u64 (uint64x1_t __a) 17005 { 17006 return __a <= 0ll ? -1ll : 0ll; 17007 } 17008 17009 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17010 vclezq_f32 (float32x4_t __a) 17011 { 17012 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; 17013 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b); 17014 } 17015 17016 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17017 vclezq_f64 (float64x2_t __a) 17018 { 17019 float64x2_t __b = {0.0, 0.0}; 17020 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b); 17021 } 17022 17023 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17024 vclezq_p8 (poly8x16_t __a) 17025 { 17026 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 17027 0, 0, 0, 0, 0, 0, 0, 0}; 17028 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a, 17029 (int8x16_t) __b); 17030 } 17031 17032 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17033 vclezq_s8 (int8x16_t __a) 17034 { 17035 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 17036 0, 0, 0, 0, 0, 0, 0, 0}; 17037 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b); 17038 } 17039 17040 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 17041 vclezq_s16 (int16x8_t __a) 17042 { 17043 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 17044 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b); 17045 } 17046 17047 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17048 vclezq_s32 (int32x4_t __a) 17049 { 17050 int32x4_t __b = {0, 0, 0, 0}; 17051 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b); 17052 } 17053 17054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17055 vclezq_s64 (int64x2_t __a) 17056 { 17057 int64x2_t __b = {0, 0}; 17058 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b); 17059 } 17060 17061 /* vclez - scalar. */ 17062 17063 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17064 vclezs_f32 (float32_t __a) 17065 { 17066 return __a <= 0.0f ? -1 : 0; 17067 } 17068 17069 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17070 vclezd_s64 (int64x1_t __a) 17071 { 17072 return __a <= 0 ? -1ll : 0ll; 17073 } 17074 17075 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17076 vclezd_u64 (int64x1_t __a) 17077 { 17078 return __a <= 0 ? -1ll : 0ll; 17079 } 17080 17081 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17082 vclezd_f64 (float64_t __a) 17083 { 17084 return __a <= 0.0 ? -1ll : 0ll; 17085 } 17086 17087 /* vclt - vector. */ 17088 17089 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17090 vclt_f32 (float32x2_t __a, float32x2_t __b) 17091 { 17092 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a); 17093 } 17094 17095 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17096 vclt_f64 (float64x1_t __a, float64x1_t __b) 17097 { 17098 return __a < __b ? -1ll : 0ll; 17099 } 17100 17101 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 17102 vclt_p8 (poly8x8_t __a, poly8x8_t __b) 17103 { 17104 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b, 17105 (int8x8_t) __a); 17106 } 17107 17108 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 17109 vclt_s8 (int8x8_t __a, int8x8_t __b) 17110 { 17111 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a); 17112 } 17113 17114 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 17115 vclt_s16 (int16x4_t __a, int16x4_t __b) 17116 { 17117 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a); 17118 } 17119 17120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17121 vclt_s32 (int32x2_t __a, int32x2_t __b) 17122 { 17123 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a); 17124 } 17125 17126 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17127 vclt_s64 (int64x1_t __a, int64x1_t __b) 17128 { 17129 return __a < __b ? -1ll : 0ll; 17130 } 17131 17132 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 17133 vclt_u8 (uint8x8_t __a, uint8x8_t __b) 17134 { 17135 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b, 17136 (int8x8_t) __a); 17137 } 17138 17139 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 17140 vclt_u16 (uint16x4_t __a, uint16x4_t __b) 17141 { 17142 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b, 17143 (int16x4_t) __a); 17144 } 17145 17146 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17147 vclt_u32 (uint32x2_t __a, uint32x2_t __b) 17148 { 17149 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b, 17150 (int32x2_t) __a); 17151 } 17152 17153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17154 vclt_u64 (uint64x1_t __a, uint64x1_t __b) 17155 { 17156 return __a < __b ? -1ll : 0ll; 17157 } 17158 17159 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17160 vcltq_f32 (float32x4_t __a, float32x4_t __b) 17161 { 17162 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a); 17163 } 17164 17165 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17166 vcltq_f64 (float64x2_t __a, float64x2_t __b) 17167 { 17168 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a); 17169 } 17170 17171 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17172 vcltq_p8 (poly8x16_t __a, poly8x16_t __b) 17173 { 17174 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b, 17175 (int8x16_t) __a); 17176 } 17177 17178 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17179 vcltq_s8 (int8x16_t __a, int8x16_t __b) 17180 { 17181 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a); 17182 } 17183 17184 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 17185 vcltq_s16 (int16x8_t __a, int16x8_t __b) 17186 { 17187 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a); 17188 } 17189 17190 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17191 vcltq_s32 (int32x4_t __a, int32x4_t __b) 17192 { 17193 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a); 17194 } 17195 17196 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17197 vcltq_s64 (int64x2_t __a, int64x2_t __b) 17198 { 17199 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a); 17200 } 17201 17202 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17203 vcltq_u8 (uint8x16_t __a, uint8x16_t __b) 17204 { 17205 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b, 17206 (int8x16_t) __a); 17207 } 17208 17209 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 17210 vcltq_u16 (uint16x8_t __a, uint16x8_t __b) 17211 { 17212 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b, 17213 (int16x8_t) __a); 17214 } 17215 17216 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17217 vcltq_u32 (uint32x4_t __a, uint32x4_t __b) 17218 { 17219 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b, 17220 (int32x4_t) __a); 17221 } 17222 17223 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17224 vcltq_u64 (uint64x2_t __a, uint64x2_t __b) 17225 { 17226 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b, 17227 (int64x2_t) __a); 17228 } 17229 17230 /* vclt - scalar. */ 17231 17232 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17233 vclts_f32 (float32_t __a, float32_t __b) 17234 { 17235 return __a < __b ? -1 : 0; 17236 } 17237 17238 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17239 vcltd_s64 (int64x1_t __a, int64x1_t __b) 17240 { 17241 return __a < __b ? -1ll : 0ll; 17242 } 17243 17244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17245 vcltd_u64 (uint64x1_t __a, uint64x1_t __b) 17246 { 17247 return __a < __b ? -1ll : 0ll; 17248 } 17249 17250 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17251 vcltd_f64 (float64_t __a, float64_t __b) 17252 { 17253 return __a < __b ? -1ll : 0ll; 17254 } 17255 17256 /* vcltz - vector. */ 17257 17258 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17259 vcltz_f32 (float32x2_t __a) 17260 { 17261 float32x2_t __b = {0.0f, 0.0f}; 17262 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b); 17263 } 17264 17265 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17266 vcltz_f64 (float64x1_t __a) 17267 { 17268 return __a < 0.0 ? -1ll : 0ll; 17269 } 17270 17271 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 17272 vcltz_p8 (poly8x8_t __a) 17273 { 17274 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 17275 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a, 17276 (int8x8_t) __b); 17277 } 17278 17279 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 17280 vcltz_s8 (int8x8_t __a) 17281 { 17282 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 17283 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b); 17284 } 17285 17286 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 17287 vcltz_s16 (int16x4_t __a) 17288 { 17289 int16x4_t __b = {0, 0, 0, 0}; 17290 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b); 17291 } 17292 17293 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17294 vcltz_s32 (int32x2_t __a) 17295 { 17296 int32x2_t __b = {0, 0}; 17297 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b); 17298 } 17299 17300 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17301 vcltz_s64 (int64x1_t __a) 17302 { 17303 return __a < 0ll ? -1ll : 0ll; 17304 } 17305 17306 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17307 vcltzq_f32 (float32x4_t __a) 17308 { 17309 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; 17310 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b); 17311 } 17312 17313 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17314 vcltzq_f64 (float64x2_t __a) 17315 { 17316 float64x2_t __b = {0.0, 0.0}; 17317 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b); 17318 } 17319 17320 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17321 vcltzq_p8 (poly8x16_t __a) 17322 { 17323 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 17324 0, 0, 0, 0, 0, 0, 0, 0}; 17325 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a, 17326 (int8x16_t) __b); 17327 } 17328 17329 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17330 vcltzq_s8 (int8x16_t __a) 17331 { 17332 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, 17333 0, 0, 0, 0, 0, 0, 0, 0}; 17334 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b); 17335 } 17336 17337 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 17338 vcltzq_s16 (int16x8_t __a) 17339 { 17340 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; 17341 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b); 17342 } 17343 17344 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17345 vcltzq_s32 (int32x4_t __a) 17346 { 17347 int32x4_t __b = {0, 0, 0, 0}; 17348 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b); 17349 } 17350 17351 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17352 vcltzq_s64 (int64x2_t __a) 17353 { 17354 int64x2_t __b = {0, 0}; 17355 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b); 17356 } 17357 17358 /* vcltz - scalar. */ 17359 17360 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17361 vcltzs_f32 (float32_t __a) 17362 { 17363 return __a < 0.0f ? -1 : 0; 17364 } 17365 17366 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17367 vcltzd_s64 (int64x1_t __a) 17368 { 17369 return __a < 0 ? -1ll : 0ll; 17370 } 17371 17372 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17373 vcltzd_u64 (int64x1_t __a) 17374 { 17375 return __a < 0 ? -1ll : 0ll; 17376 } 17377 17378 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17379 vcltzd_f64 (float64_t __a) 17380 { 17381 return __a < 0.0 ? -1ll : 0ll; 17382 } 17383 17384 /* vclz. */ 17385 17386 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 17387 vclz_s8 (int8x8_t __a) 17388 { 17389 return __builtin_aarch64_clzv8qi (__a); 17390 } 17391 17392 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 17393 vclz_s16 (int16x4_t __a) 17394 { 17395 return __builtin_aarch64_clzv4hi (__a); 17396 } 17397 17398 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 17399 vclz_s32 (int32x2_t __a) 17400 { 17401 return __builtin_aarch64_clzv2si (__a); 17402 } 17403 17404 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 17405 vclz_u8 (uint8x8_t __a) 17406 { 17407 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a); 17408 } 17409 17410 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 17411 vclz_u16 (uint16x4_t __a) 17412 { 17413 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a); 17414 } 17415 17416 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17417 vclz_u32 (uint32x2_t __a) 17418 { 17419 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a); 17420 } 17421 17422 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 17423 vclzq_s8 (int8x16_t __a) 17424 { 17425 return __builtin_aarch64_clzv16qi (__a); 17426 } 17427 17428 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 17429 vclzq_s16 (int16x8_t __a) 17430 { 17431 return __builtin_aarch64_clzv8hi (__a); 17432 } 17433 17434 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 17435 vclzq_s32 (int32x4_t __a) 17436 { 17437 return __builtin_aarch64_clzv4si (__a); 17438 } 17439 17440 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 17441 vclzq_u8 (uint8x16_t __a) 17442 { 17443 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a); 17444 } 17445 17446 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 17447 vclzq_u16 (uint16x8_t __a) 17448 { 17449 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a); 17450 } 17451 17452 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17453 vclzq_u32 (uint32x4_t __a) 17454 { 17455 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a); 17456 } 17457 17458 /* vcvt (double -> float). */ 17459 17460 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 17461 vcvt_f32_f64 (float64x2_t __a) 17462 { 17463 return __builtin_aarch64_float_truncate_lo_v2sf (__a); 17464 } 17465 17466 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 17467 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) 17468 { 17469 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b); 17470 } 17471 17472 /* vcvt (float -> double). */ 17473 17474 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 17475 vcvt_f64_f32 (float32x2_t __a) 17476 { 17477 17478 return __builtin_aarch64_float_extend_lo_v2df (__a); 17479 } 17480 17481 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 17482 vcvt_high_f64_f32 (float32x4_t __a) 17483 { 17484 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a); 17485 } 17486 17487 /* vcvt (<u>int -> float) */ 17488 17489 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 17490 vcvtd_f64_s64 (int64_t __a) 17491 { 17492 return (float64_t) __a; 17493 } 17494 17495 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 17496 vcvtd_f64_u64 (uint64_t __a) 17497 { 17498 return (float64_t) __a; 17499 } 17500 17501 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 17502 vcvts_f32_s32 (int32_t __a) 17503 { 17504 return (float32_t) __a; 17505 } 17506 17507 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 17508 vcvts_f32_u32 (uint32_t __a) 17509 { 17510 return (float32_t) __a; 17511 } 17512 17513 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 17514 vcvt_f32_s32 (int32x2_t __a) 17515 { 17516 return __builtin_aarch64_floatv2siv2sf (__a); 17517 } 17518 17519 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 17520 vcvt_f32_u32 (uint32x2_t __a) 17521 { 17522 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a); 17523 } 17524 17525 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 17526 vcvtq_f32_s32 (int32x4_t __a) 17527 { 17528 return __builtin_aarch64_floatv4siv4sf (__a); 17529 } 17530 17531 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 17532 vcvtq_f32_u32 (uint32x4_t __a) 17533 { 17534 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a); 17535 } 17536 17537 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 17538 vcvtq_f64_s64 (int64x2_t __a) 17539 { 17540 return __builtin_aarch64_floatv2div2df (__a); 17541 } 17542 17543 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 17544 vcvtq_f64_u64 (uint64x2_t __a) 17545 { 17546 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a); 17547 } 17548 17549 /* vcvt (float -> <u>int) */ 17550 17551 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 17552 vcvtd_s64_f64 (float64_t __a) 17553 { 17554 return (int64_t) __a; 17555 } 17556 17557 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17558 vcvtd_u64_f64 (float64_t __a) 17559 { 17560 return (uint64_t) __a; 17561 } 17562 17563 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 17564 vcvts_s32_f32 (float32_t __a) 17565 { 17566 return (int32_t) __a; 17567 } 17568 17569 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17570 vcvts_u32_f32 (float32_t __a) 17571 { 17572 return (uint32_t) __a; 17573 } 17574 17575 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 17576 vcvt_s32_f32 (float32x2_t __a) 17577 { 17578 return __builtin_aarch64_lbtruncv2sfv2si (__a); 17579 } 17580 17581 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17582 vcvt_u32_f32 (float32x2_t __a) 17583 { 17584 /* TODO: This cast should go away when builtins have 17585 their correct types. */ 17586 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a); 17587 } 17588 17589 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 17590 vcvtq_s32_f32 (float32x4_t __a) 17591 { 17592 return __builtin_aarch64_lbtruncv4sfv4si (__a); 17593 } 17594 17595 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17596 vcvtq_u32_f32 (float32x4_t __a) 17597 { 17598 /* TODO: This cast should go away when builtins have 17599 their correct types. */ 17600 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a); 17601 } 17602 17603 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 17604 vcvtq_s64_f64 (float64x2_t __a) 17605 { 17606 return __builtin_aarch64_lbtruncv2dfv2di (__a); 17607 } 17608 17609 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17610 vcvtq_u64_f64 (float64x2_t __a) 17611 { 17612 /* TODO: This cast should go away when builtins have 17613 their correct types. */ 17614 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a); 17615 } 17616 17617 /* vcvta */ 17618 17619 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 17620 vcvtad_s64_f64 (float64_t __a) 17621 { 17622 return __builtin_aarch64_lrounddfdi (__a); 17623 } 17624 17625 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17626 vcvtad_u64_f64 (float64_t __a) 17627 { 17628 return __builtin_aarch64_lroundudfdi (__a); 17629 } 17630 17631 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 17632 vcvtas_s32_f32 (float32_t __a) 17633 { 17634 return __builtin_aarch64_lroundsfsi (__a); 17635 } 17636 17637 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17638 vcvtas_u32_f32 (float32_t __a) 17639 { 17640 return __builtin_aarch64_lroundusfsi (__a); 17641 } 17642 17643 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 17644 vcvta_s32_f32 (float32x2_t __a) 17645 { 17646 return __builtin_aarch64_lroundv2sfv2si (__a); 17647 } 17648 17649 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17650 vcvta_u32_f32 (float32x2_t __a) 17651 { 17652 /* TODO: This cast should go away when builtins have 17653 their correct types. */ 17654 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a); 17655 } 17656 17657 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 17658 vcvtaq_s32_f32 (float32x4_t __a) 17659 { 17660 return __builtin_aarch64_lroundv4sfv4si (__a); 17661 } 17662 17663 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17664 vcvtaq_u32_f32 (float32x4_t __a) 17665 { 17666 /* TODO: This cast should go away when builtins have 17667 their correct types. */ 17668 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a); 17669 } 17670 17671 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 17672 vcvtaq_s64_f64 (float64x2_t __a) 17673 { 17674 return __builtin_aarch64_lroundv2dfv2di (__a); 17675 } 17676 17677 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17678 vcvtaq_u64_f64 (float64x2_t __a) 17679 { 17680 /* TODO: This cast should go away when builtins have 17681 their correct types. */ 17682 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a); 17683 } 17684 17685 /* vcvtm */ 17686 17687 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 17688 vcvtmd_s64_f64 (float64_t __a) 17689 { 17690 return __builtin_llfloor (__a); 17691 } 17692 17693 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17694 vcvtmd_u64_f64 (float64_t __a) 17695 { 17696 return __builtin_aarch64_lfloorudfdi (__a); 17697 } 17698 17699 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 17700 vcvtms_s32_f32 (float32_t __a) 17701 { 17702 return __builtin_ifloorf (__a); 17703 } 17704 17705 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17706 vcvtms_u32_f32 (float32_t __a) 17707 { 17708 return __builtin_aarch64_lfloorusfsi (__a); 17709 } 17710 17711 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 17712 vcvtm_s32_f32 (float32x2_t __a) 17713 { 17714 return __builtin_aarch64_lfloorv2sfv2si (__a); 17715 } 17716 17717 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17718 vcvtm_u32_f32 (float32x2_t __a) 17719 { 17720 /* TODO: This cast should go away when builtins have 17721 their correct types. */ 17722 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a); 17723 } 17724 17725 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 17726 vcvtmq_s32_f32 (float32x4_t __a) 17727 { 17728 return __builtin_aarch64_lfloorv4sfv4si (__a); 17729 } 17730 17731 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17732 vcvtmq_u32_f32 (float32x4_t __a) 17733 { 17734 /* TODO: This cast should go away when builtins have 17735 their correct types. */ 17736 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a); 17737 } 17738 17739 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 17740 vcvtmq_s64_f64 (float64x2_t __a) 17741 { 17742 return __builtin_aarch64_lfloorv2dfv2di (__a); 17743 } 17744 17745 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17746 vcvtmq_u64_f64 (float64x2_t __a) 17747 { 17748 /* TODO: This cast should go away when builtins have 17749 their correct types. */ 17750 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a); 17751 } 17752 17753 /* vcvtn */ 17754 17755 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 17756 vcvtnd_s64_f64 (float64_t __a) 17757 { 17758 return __builtin_aarch64_lfrintndfdi (__a); 17759 } 17760 17761 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17762 vcvtnd_u64_f64 (float64_t __a) 17763 { 17764 return __builtin_aarch64_lfrintnudfdi (__a); 17765 } 17766 17767 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 17768 vcvtns_s32_f32 (float32_t __a) 17769 { 17770 return __builtin_aarch64_lfrintnsfsi (__a); 17771 } 17772 17773 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17774 vcvtns_u32_f32 (float32_t __a) 17775 { 17776 return __builtin_aarch64_lfrintnusfsi (__a); 17777 } 17778 17779 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 17780 vcvtn_s32_f32 (float32x2_t __a) 17781 { 17782 return __builtin_aarch64_lfrintnv2sfv2si (__a); 17783 } 17784 17785 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17786 vcvtn_u32_f32 (float32x2_t __a) 17787 { 17788 /* TODO: This cast should go away when builtins have 17789 their correct types. */ 17790 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a); 17791 } 17792 17793 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 17794 vcvtnq_s32_f32 (float32x4_t __a) 17795 { 17796 return __builtin_aarch64_lfrintnv4sfv4si (__a); 17797 } 17798 17799 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17800 vcvtnq_u32_f32 (float32x4_t __a) 17801 { 17802 /* TODO: This cast should go away when builtins have 17803 their correct types. */ 17804 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a); 17805 } 17806 17807 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 17808 vcvtnq_s64_f64 (float64x2_t __a) 17809 { 17810 return __builtin_aarch64_lfrintnv2dfv2di (__a); 17811 } 17812 17813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17814 vcvtnq_u64_f64 (float64x2_t __a) 17815 { 17816 /* TODO: This cast should go away when builtins have 17817 their correct types. */ 17818 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a); 17819 } 17820 17821 /* vcvtp */ 17822 17823 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 17824 vcvtpd_s64_f64 (float64_t __a) 17825 { 17826 return __builtin_llceil (__a); 17827 } 17828 17829 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 17830 vcvtpd_u64_f64 (float64_t __a) 17831 { 17832 return __builtin_aarch64_lceiludfdi (__a); 17833 } 17834 17835 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 17836 vcvtps_s32_f32 (float32_t __a) 17837 { 17838 return __builtin_iceilf (__a); 17839 } 17840 17841 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 17842 vcvtps_u32_f32 (float32_t __a) 17843 { 17844 return __builtin_aarch64_lceilusfsi (__a); 17845 } 17846 17847 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 17848 vcvtp_s32_f32 (float32x2_t __a) 17849 { 17850 return __builtin_aarch64_lceilv2sfv2si (__a); 17851 } 17852 17853 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17854 vcvtp_u32_f32 (float32x2_t __a) 17855 { 17856 /* TODO: This cast should go away when builtins have 17857 their correct types. */ 17858 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a); 17859 } 17860 17861 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 17862 vcvtpq_s32_f32 (float32x4_t __a) 17863 { 17864 return __builtin_aarch64_lceilv4sfv4si (__a); 17865 } 17866 17867 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 17868 vcvtpq_u32_f32 (float32x4_t __a) 17869 { 17870 /* TODO: This cast should go away when builtins have 17871 their correct types. */ 17872 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a); 17873 } 17874 17875 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 17876 vcvtpq_s64_f64 (float64x2_t __a) 17877 { 17878 return __builtin_aarch64_lceilv2dfv2di (__a); 17879 } 17880 17881 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 17882 vcvtpq_u64_f64 (float64x2_t __a) 17883 { 17884 /* TODO: This cast should go away when builtins have 17885 their correct types. */ 17886 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a); 17887 } 17888 17889 /* vdup_n */ 17890 17891 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 17892 vdup_n_f32 (float32_t __a) 17893 { 17894 return (float32x2_t) {__a, __a}; 17895 } 17896 17897 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 17898 vdup_n_f64 (float64_t __a) 17899 { 17900 return __a; 17901 } 17902 17903 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 17904 vdup_n_p8 (poly8_t __a) 17905 { 17906 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; 17907 } 17908 17909 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 17910 vdup_n_p16 (poly16_t __a) 17911 { 17912 return (poly16x4_t) {__a, __a, __a, __a}; 17913 } 17914 17915 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 17916 vdup_n_s8 (int8_t __a) 17917 { 17918 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; 17919 } 17920 17921 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 17922 vdup_n_s16 (int16_t __a) 17923 { 17924 return (int16x4_t) {__a, __a, __a, __a}; 17925 } 17926 17927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 17928 vdup_n_s32 (int32_t __a) 17929 { 17930 return (int32x2_t) {__a, __a}; 17931 } 17932 17933 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 17934 vdup_n_s64 (int64_t __a) 17935 { 17936 return __a; 17937 } 17938 17939 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 17940 vdup_n_u8 (uint8_t __a) 17941 { 17942 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; 17943 } 17944 17945 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 17946 vdup_n_u16 (uint16_t __a) 17947 { 17948 return (uint16x4_t) {__a, __a, __a, __a}; 17949 } 17950 17951 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 17952 vdup_n_u32 (uint32_t __a) 17953 { 17954 return (uint32x2_t) {__a, __a}; 17955 } 17956 17957 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 17958 vdup_n_u64 (uint64_t __a) 17959 { 17960 return __a; 17961 } 17962 17963 /* vdupq_n */ 17964 17965 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 17966 vdupq_n_f32 (float32_t __a) 17967 { 17968 return (float32x4_t) {__a, __a, __a, __a}; 17969 } 17970 17971 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 17972 vdupq_n_f64 (float64_t __a) 17973 { 17974 return (float64x2_t) {__a, __a}; 17975 } 17976 17977 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 17978 vdupq_n_p8 (uint32_t __a) 17979 { 17980 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, 17981 __a, __a, __a, __a, __a, __a, __a, __a}; 17982 } 17983 17984 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 17985 vdupq_n_p16 (uint32_t __a) 17986 { 17987 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; 17988 } 17989 17990 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 17991 vdupq_n_s8 (int32_t __a) 17992 { 17993 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, 17994 __a, __a, __a, __a, __a, __a, __a, __a}; 17995 } 17996 17997 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 17998 vdupq_n_s16 (int32_t __a) 17999 { 18000 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; 18001 } 18002 18003 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 18004 vdupq_n_s32 (int32_t __a) 18005 { 18006 return (int32x4_t) {__a, __a, __a, __a}; 18007 } 18008 18009 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 18010 vdupq_n_s64 (int64_t __a) 18011 { 18012 return (int64x2_t) {__a, __a}; 18013 } 18014 18015 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 18016 vdupq_n_u8 (uint32_t __a) 18017 { 18018 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, 18019 __a, __a, __a, __a, __a, __a, __a, __a}; 18020 } 18021 18022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 18023 vdupq_n_u16 (uint32_t __a) 18024 { 18025 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; 18026 } 18027 18028 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 18029 vdupq_n_u32 (uint32_t __a) 18030 { 18031 return (uint32x4_t) {__a, __a, __a, __a}; 18032 } 18033 18034 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 18035 vdupq_n_u64 (uint64_t __a) 18036 { 18037 return (uint64x2_t) {__a, __a}; 18038 } 18039 18040 /* vdup_lane */ 18041 18042 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 18043 vdup_lane_f32 (float32x2_t __a, const int __b) 18044 { 18045 return __aarch64_vdup_lane_f32 (__a, __b); 18046 } 18047 18048 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 18049 vdup_lane_f64 (float64x1_t __a, const int __b) 18050 { 18051 return __aarch64_vdup_lane_f64 (__a, __b); 18052 } 18053 18054 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 18055 vdup_lane_p8 (poly8x8_t __a, const int __b) 18056 { 18057 return __aarch64_vdup_lane_p8 (__a, __b); 18058 } 18059 18060 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 18061 vdup_lane_p16 (poly16x4_t __a, const int __b) 18062 { 18063 return __aarch64_vdup_lane_p16 (__a, __b); 18064 } 18065 18066 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 18067 vdup_lane_s8 (int8x8_t __a, const int __b) 18068 { 18069 return __aarch64_vdup_lane_s8 (__a, __b); 18070 } 18071 18072 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 18073 vdup_lane_s16 (int16x4_t __a, const int __b) 18074 { 18075 return __aarch64_vdup_lane_s16 (__a, __b); 18076 } 18077 18078 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 18079 vdup_lane_s32 (int32x2_t __a, const int __b) 18080 { 18081 return __aarch64_vdup_lane_s32 (__a, __b); 18082 } 18083 18084 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 18085 vdup_lane_s64 (int64x1_t __a, const int __b) 18086 { 18087 return __aarch64_vdup_lane_s64 (__a, __b); 18088 } 18089 18090 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 18091 vdup_lane_u8 (uint8x8_t __a, const int __b) 18092 { 18093 return __aarch64_vdup_lane_u8 (__a, __b); 18094 } 18095 18096 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 18097 vdup_lane_u16 (uint16x4_t __a, const int __b) 18098 { 18099 return __aarch64_vdup_lane_u16 (__a, __b); 18100 } 18101 18102 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 18103 vdup_lane_u32 (uint32x2_t __a, const int __b) 18104 { 18105 return __aarch64_vdup_lane_u32 (__a, __b); 18106 } 18107 18108 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 18109 vdup_lane_u64 (uint64x1_t __a, const int __b) 18110 { 18111 return __aarch64_vdup_lane_u64 (__a, __b); 18112 } 18113 18114 /* vdup_laneq */ 18115 18116 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 18117 vdup_laneq_f32 (float32x4_t __a, const int __b) 18118 { 18119 return __aarch64_vdup_laneq_f32 (__a, __b); 18120 } 18121 18122 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 18123 vdup_laneq_f64 (float64x2_t __a, const int __b) 18124 { 18125 return __aarch64_vdup_laneq_f64 (__a, __b); 18126 } 18127 18128 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 18129 vdup_laneq_p8 (poly8x16_t __a, const int __b) 18130 { 18131 return __aarch64_vdup_laneq_p8 (__a, __b); 18132 } 18133 18134 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 18135 vdup_laneq_p16 (poly16x8_t __a, const int __b) 18136 { 18137 return __aarch64_vdup_laneq_p16 (__a, __b); 18138 } 18139 18140 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 18141 vdup_laneq_s8 (int8x16_t __a, const int __b) 18142 { 18143 return __aarch64_vdup_laneq_s8 (__a, __b); 18144 } 18145 18146 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 18147 vdup_laneq_s16 (int16x8_t __a, const int __b) 18148 { 18149 return __aarch64_vdup_laneq_s16 (__a, __b); 18150 } 18151 18152 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 18153 vdup_laneq_s32 (int32x4_t __a, const int __b) 18154 { 18155 return __aarch64_vdup_laneq_s32 (__a, __b); 18156 } 18157 18158 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 18159 vdup_laneq_s64 (int64x2_t __a, const int __b) 18160 { 18161 return __aarch64_vdup_laneq_s64 (__a, __b); 18162 } 18163 18164 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 18165 vdup_laneq_u8 (uint8x16_t __a, const int __b) 18166 { 18167 return __aarch64_vdup_laneq_u8 (__a, __b); 18168 } 18169 18170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 18171 vdup_laneq_u16 (uint16x8_t __a, const int __b) 18172 { 18173 return __aarch64_vdup_laneq_u16 (__a, __b); 18174 } 18175 18176 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 18177 vdup_laneq_u32 (uint32x4_t __a, const int __b) 18178 { 18179 return __aarch64_vdup_laneq_u32 (__a, __b); 18180 } 18181 18182 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 18183 vdup_laneq_u64 (uint64x2_t __a, const int __b) 18184 { 18185 return __aarch64_vdup_laneq_u64 (__a, __b); 18186 } 18187 18188 /* vdupq_lane */ 18189 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 18190 vdupq_lane_f32 (float32x2_t __a, const int __b) 18191 { 18192 return __aarch64_vdupq_lane_f32 (__a, __b); 18193 } 18194 18195 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 18196 vdupq_lane_f64 (float64x1_t __a, const int __b) 18197 { 18198 return __aarch64_vdupq_lane_f64 (__a, __b); 18199 } 18200 18201 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 18202 vdupq_lane_p8 (poly8x8_t __a, const int __b) 18203 { 18204 return __aarch64_vdupq_lane_p8 (__a, __b); 18205 } 18206 18207 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 18208 vdupq_lane_p16 (poly16x4_t __a, const int __b) 18209 { 18210 return __aarch64_vdupq_lane_p16 (__a, __b); 18211 } 18212 18213 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 18214 vdupq_lane_s8 (int8x8_t __a, const int __b) 18215 { 18216 return __aarch64_vdupq_lane_s8 (__a, __b); 18217 } 18218 18219 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 18220 vdupq_lane_s16 (int16x4_t __a, const int __b) 18221 { 18222 return __aarch64_vdupq_lane_s16 (__a, __b); 18223 } 18224 18225 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 18226 vdupq_lane_s32 (int32x2_t __a, const int __b) 18227 { 18228 return __aarch64_vdupq_lane_s32 (__a, __b); 18229 } 18230 18231 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 18232 vdupq_lane_s64 (int64x1_t __a, const int __b) 18233 { 18234 return __aarch64_vdupq_lane_s64 (__a, __b); 18235 } 18236 18237 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 18238 vdupq_lane_u8 (uint8x8_t __a, const int __b) 18239 { 18240 return __aarch64_vdupq_lane_u8 (__a, __b); 18241 } 18242 18243 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 18244 vdupq_lane_u16 (uint16x4_t __a, const int __b) 18245 { 18246 return __aarch64_vdupq_lane_u16 (__a, __b); 18247 } 18248 18249 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 18250 vdupq_lane_u32 (uint32x2_t __a, const int __b) 18251 { 18252 return __aarch64_vdupq_lane_u32 (__a, __b); 18253 } 18254 18255 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 18256 vdupq_lane_u64 (uint64x1_t __a, const int __b) 18257 { 18258 return __aarch64_vdupq_lane_u64 (__a, __b); 18259 } 18260 18261 /* vdupq_laneq */ 18262 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 18263 vdupq_laneq_f32 (float32x4_t __a, const int __b) 18264 { 18265 return __aarch64_vdupq_laneq_f32 (__a, __b); 18266 } 18267 18268 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 18269 vdupq_laneq_f64 (float64x2_t __a, const int __b) 18270 { 18271 return __aarch64_vdupq_laneq_f64 (__a, __b); 18272 } 18273 18274 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 18275 vdupq_laneq_p8 (poly8x16_t __a, const int __b) 18276 { 18277 return __aarch64_vdupq_laneq_p8 (__a, __b); 18278 } 18279 18280 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 18281 vdupq_laneq_p16 (poly16x8_t __a, const int __b) 18282 { 18283 return __aarch64_vdupq_laneq_p16 (__a, __b); 18284 } 18285 18286 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 18287 vdupq_laneq_s8 (int8x16_t __a, const int __b) 18288 { 18289 return __aarch64_vdupq_laneq_s8 (__a, __b); 18290 } 18291 18292 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 18293 vdupq_laneq_s16 (int16x8_t __a, const int __b) 18294 { 18295 return __aarch64_vdupq_laneq_s16 (__a, __b); 18296 } 18297 18298 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 18299 vdupq_laneq_s32 (int32x4_t __a, const int __b) 18300 { 18301 return __aarch64_vdupq_laneq_s32 (__a, __b); 18302 } 18303 18304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 18305 vdupq_laneq_s64 (int64x2_t __a, const int __b) 18306 { 18307 return __aarch64_vdupq_laneq_s64 (__a, __b); 18308 } 18309 18310 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 18311 vdupq_laneq_u8 (uint8x16_t __a, const int __b) 18312 { 18313 return __aarch64_vdupq_laneq_u8 (__a, __b); 18314 } 18315 18316 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 18317 vdupq_laneq_u16 (uint16x8_t __a, const int __b) 18318 { 18319 return __aarch64_vdupq_laneq_u16 (__a, __b); 18320 } 18321 18322 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 18323 vdupq_laneq_u32 (uint32x4_t __a, const int __b) 18324 { 18325 return __aarch64_vdupq_laneq_u32 (__a, __b); 18326 } 18327 18328 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 18329 vdupq_laneq_u64 (uint64x2_t __a, const int __b) 18330 { 18331 return __aarch64_vdupq_laneq_u64 (__a, __b); 18332 } 18333 18334 /* vdupb_lane */ 18335 __extension__ static __inline poly8_t __attribute__ ((__always_inline__)) 18336 vdupb_lane_p8 (poly8x8_t __a, const int __b) 18337 { 18338 return __aarch64_vget_lane_p8 (__a, __b); 18339 } 18340 18341 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 18342 vdupb_lane_s8 (int8x8_t __a, const int __b) 18343 { 18344 return __aarch64_vget_lane_s8 (__a, __b); 18345 } 18346 18347 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 18348 vdupb_lane_u8 (uint8x8_t __a, const int __b) 18349 { 18350 return __aarch64_vget_lane_u8 (__a, __b); 18351 } 18352 18353 /* vduph_lane */ 18354 __extension__ static __inline poly16_t __attribute__ ((__always_inline__)) 18355 vduph_lane_p16 (poly16x4_t __a, const int __b) 18356 { 18357 return __aarch64_vget_lane_p16 (__a, __b); 18358 } 18359 18360 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 18361 vduph_lane_s16 (int16x4_t __a, const int __b) 18362 { 18363 return __aarch64_vget_lane_s16 (__a, __b); 18364 } 18365 18366 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 18367 vduph_lane_u16 (uint16x4_t __a, const int __b) 18368 { 18369 return __aarch64_vget_lane_u16 (__a, __b); 18370 } 18371 18372 /* vdups_lane */ 18373 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 18374 vdups_lane_f32 (float32x2_t __a, const int __b) 18375 { 18376 return __aarch64_vget_lane_f32 (__a, __b); 18377 } 18378 18379 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 18380 vdups_lane_s32 (int32x2_t __a, const int __b) 18381 { 18382 return __aarch64_vget_lane_s32 (__a, __b); 18383 } 18384 18385 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 18386 vdups_lane_u32 (uint32x2_t __a, const int __b) 18387 { 18388 return __aarch64_vget_lane_u32 (__a, __b); 18389 } 18390 18391 /* vdupd_lane */ 18392 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18393 vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b) 18394 { 18395 return __a; 18396 } 18397 18398 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 18399 vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b) 18400 { 18401 return __a; 18402 } 18403 18404 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 18405 vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b) 18406 { 18407 return __a; 18408 } 18409 18410 /* vdupb_laneq */ 18411 __extension__ static __inline poly8_t __attribute__ ((__always_inline__)) 18412 vdupb_laneq_p8 (poly8x16_t __a, const int __b) 18413 { 18414 return __aarch64_vgetq_lane_p8 (__a, __b); 18415 } 18416 18417 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 18418 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b) 18419 { 18420 return __aarch64_vgetq_lane_s8 (__a, __b); 18421 } 18422 18423 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 18424 vdupb_laneq_u8 (uint8x16_t __a, const int __b) 18425 { 18426 return __aarch64_vgetq_lane_u8 (__a, __b); 18427 } 18428 18429 /* vduph_laneq */ 18430 __extension__ static __inline poly16_t __attribute__ ((__always_inline__)) 18431 vduph_laneq_p16 (poly16x8_t __a, const int __b) 18432 { 18433 return __aarch64_vgetq_lane_p16 (__a, __b); 18434 } 18435 18436 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 18437 vduph_laneq_s16 (int16x8_t __a, const int __b) 18438 { 18439 return __aarch64_vgetq_lane_s16 (__a, __b); 18440 } 18441 18442 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 18443 vduph_laneq_u16 (uint16x8_t __a, const int __b) 18444 { 18445 return __aarch64_vgetq_lane_u16 (__a, __b); 18446 } 18447 18448 /* vdups_laneq */ 18449 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 18450 vdups_laneq_f32 (float32x4_t __a, const int __b) 18451 { 18452 return __aarch64_vgetq_lane_f32 (__a, __b); 18453 } 18454 18455 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 18456 vdups_laneq_s32 (int32x4_t __a, const int __b) 18457 { 18458 return __aarch64_vgetq_lane_s32 (__a, __b); 18459 } 18460 18461 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 18462 vdups_laneq_u32 (uint32x4_t __a, const int __b) 18463 { 18464 return __aarch64_vgetq_lane_u32 (__a, __b); 18465 } 18466 18467 /* vdupd_laneq */ 18468 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18469 vdupd_laneq_f64 (float64x2_t __a, const int __b) 18470 { 18471 return __aarch64_vgetq_lane_f64 (__a, __b); 18472 } 18473 18474 __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 18475 vdupd_laneq_s64 (int64x2_t __a, const int __b) 18476 { 18477 return __aarch64_vgetq_lane_s64 (__a, __b); 18478 } 18479 18480 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 18481 vdupd_laneq_u64 (uint64x2_t __a, const int __b) 18482 { 18483 return __aarch64_vgetq_lane_u64 (__a, __b); 18484 } 18485 18486 /* vfma_lane */ 18487 18488 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 18489 vfma_lane_f32 (float32x2_t __a, float32x2_t __b, 18490 float32x2_t __c, const int __lane) 18491 { 18492 return __builtin_aarch64_fmav2sf (__b, 18493 __aarch64_vdup_lane_f32 (__c, __lane), 18494 __a); 18495 } 18496 18497 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18498 vfma_lane_f64 (float64_t __a, float64_t __b, 18499 float64_t __c, const int __lane) 18500 { 18501 return __builtin_fma (__b, __c, __a); 18502 } 18503 18504 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18505 vfmad_lane_f64 (float64_t __a, float64_t __b, 18506 float64_t __c, const int __lane) 18507 { 18508 return __builtin_fma (__b, __c, __a); 18509 } 18510 18511 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 18512 vfmas_lane_f32 (float32_t __a, float32_t __b, 18513 float32x2_t __c, const int __lane) 18514 { 18515 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a); 18516 } 18517 18518 /* vfma_laneq */ 18519 18520 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 18521 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b, 18522 float32x4_t __c, const int __lane) 18523 { 18524 return __builtin_aarch64_fmav2sf (__b, 18525 __aarch64_vdup_laneq_f32 (__c, __lane), 18526 __a); 18527 } 18528 18529 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18530 vfma_laneq_f64 (float64_t __a, float64_t __b, 18531 float64x2_t __c, const int __lane) 18532 { 18533 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); 18534 } 18535 18536 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18537 vfmad_laneq_f64 (float64_t __a, float64_t __b, 18538 float64x2_t __c, const int __lane) 18539 { 18540 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); 18541 } 18542 18543 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 18544 vfmas_laneq_f32 (float32_t __a, float32_t __b, 18545 float32x4_t __c, const int __lane) 18546 { 18547 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a); 18548 } 18549 18550 /* vfmaq_lane */ 18551 18552 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 18553 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b, 18554 float32x2_t __c, const int __lane) 18555 { 18556 return __builtin_aarch64_fmav4sf (__b, 18557 __aarch64_vdupq_lane_f32 (__c, __lane), 18558 __a); 18559 } 18560 18561 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 18562 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b, 18563 float64_t __c, const int __lane) 18564 { 18565 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a); 18566 } 18567 18568 /* vfmaq_laneq */ 18569 18570 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 18571 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b, 18572 float32x4_t __c, const int __lane) 18573 { 18574 return __builtin_aarch64_fmav4sf (__b, 18575 __aarch64_vdupq_laneq_f32 (__c, __lane), 18576 __a); 18577 } 18578 18579 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 18580 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b, 18581 float64x2_t __c, const int __lane) 18582 { 18583 return __builtin_aarch64_fmav2df (__b, 18584 __aarch64_vdupq_laneq_f64 (__c, __lane), 18585 __a); 18586 } 18587 18588 /* vfms_lane */ 18589 18590 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 18591 vfms_lane_f32 (float32x2_t __a, float32x2_t __b, 18592 float32x2_t __c, const int __lane) 18593 { 18594 return __builtin_aarch64_fmav2sf (-__b, 18595 __aarch64_vdup_lane_f32 (__c, __lane), 18596 __a); 18597 } 18598 18599 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18600 vfms_lane_f64 (float64_t __a, float64_t __b, 18601 float64_t __c, const int __lane) 18602 { 18603 return __builtin_fma (-__b, __c, __a); 18604 } 18605 18606 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18607 vfmsd_lane_f64 (float64_t __a, float64_t __b, 18608 float64_t __c, const int __lane) 18609 { 18610 return __builtin_fma (-__b, __c, __a); 18611 } 18612 18613 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 18614 vfmss_lane_f32 (float32_t __a, float32_t __b, 18615 float32x2_t __c, const int __lane) 18616 { 18617 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a); 18618 } 18619 18620 /* vfms_laneq */ 18621 18622 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 18623 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b, 18624 float32x4_t __c, const int __lane) 18625 { 18626 return __builtin_aarch64_fmav2sf (-__b, 18627 __aarch64_vdup_laneq_f32 (__c, __lane), 18628 __a); 18629 } 18630 18631 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18632 vfms_laneq_f64 (float64_t __a, float64_t __b, 18633 float64x2_t __c, const int __lane) 18634 { 18635 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); 18636 } 18637 18638 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 18639 vfmsd_laneq_f64 (float64_t __a, float64_t __b, 18640 float64x2_t __c, const int __lane) 18641 { 18642 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); 18643 } 18644 18645 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 18646 vfmss_laneq_f32 (float32_t __a, float32_t __b, 18647 float32x4_t __c, const int __lane) 18648 { 18649 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a); 18650 } 18651 18652 /* vfmsq_lane */ 18653 18654 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 18655 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b, 18656 float32x2_t __c, const int __lane) 18657 { 18658 return __builtin_aarch64_fmav4sf (-__b, 18659 __aarch64_vdupq_lane_f32 (__c, __lane), 18660 __a); 18661 } 18662 18663 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 18664 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b, 18665 float64_t __c, const int __lane) 18666 { 18667 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a); 18668 } 18669 18670 /* vfmsq_laneq */ 18671 18672 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 18673 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b, 18674 float32x4_t __c, const int __lane) 18675 { 18676 return __builtin_aarch64_fmav4sf (-__b, 18677 __aarch64_vdupq_laneq_f32 (__c, __lane), 18678 __a); 18679 } 18680 18681 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 18682 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b, 18683 float64x2_t __c, const int __lane) 18684 { 18685 return __builtin_aarch64_fmav2df (-__b, 18686 __aarch64_vdupq_laneq_f64 (__c, __lane), 18687 __a); 18688 } 18689 18690 /* vld1 */ 18691 18692 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 18693 vld1_f32 (const float32_t *a) 18694 { 18695 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a); 18696 } 18697 18698 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 18699 vld1_f64 (const float64_t *a) 18700 { 18701 return *a; 18702 } 18703 18704 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 18705 vld1_p8 (const poly8_t *a) 18706 { 18707 return (poly8x8_t) 18708 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); 18709 } 18710 18711 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 18712 vld1_p16 (const poly16_t *a) 18713 { 18714 return (poly16x4_t) 18715 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); 18716 } 18717 18718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 18719 vld1_s8 (const int8_t *a) 18720 { 18721 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); 18722 } 18723 18724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 18725 vld1_s16 (const int16_t *a) 18726 { 18727 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); 18728 } 18729 18730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 18731 vld1_s32 (const int32_t *a) 18732 { 18733 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); 18734 } 18735 18736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 18737 vld1_s64 (const int64_t *a) 18738 { 18739 return *a; 18740 } 18741 18742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 18743 vld1_u8 (const uint8_t *a) 18744 { 18745 return (uint8x8_t) 18746 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); 18747 } 18748 18749 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 18750 vld1_u16 (const uint16_t *a) 18751 { 18752 return (uint16x4_t) 18753 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); 18754 } 18755 18756 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 18757 vld1_u32 (const uint32_t *a) 18758 { 18759 return (uint32x2_t) 18760 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); 18761 } 18762 18763 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 18764 vld1_u64 (const uint64_t *a) 18765 { 18766 return *a; 18767 } 18768 18769 /* vld1q */ 18770 18771 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 18772 vld1q_f32 (const float32_t *a) 18773 { 18774 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a); 18775 } 18776 18777 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 18778 vld1q_f64 (const float64_t *a) 18779 { 18780 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a); 18781 } 18782 18783 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 18784 vld1q_p8 (const poly8_t *a) 18785 { 18786 return (poly8x16_t) 18787 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); 18788 } 18789 18790 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 18791 vld1q_p16 (const poly16_t *a) 18792 { 18793 return (poly16x8_t) 18794 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); 18795 } 18796 18797 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 18798 vld1q_s8 (const int8_t *a) 18799 { 18800 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); 18801 } 18802 18803 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 18804 vld1q_s16 (const int16_t *a) 18805 { 18806 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); 18807 } 18808 18809 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 18810 vld1q_s32 (const int32_t *a) 18811 { 18812 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); 18813 } 18814 18815 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 18816 vld1q_s64 (const int64_t *a) 18817 { 18818 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); 18819 } 18820 18821 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 18822 vld1q_u8 (const uint8_t *a) 18823 { 18824 return (uint8x16_t) 18825 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); 18826 } 18827 18828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 18829 vld1q_u16 (const uint16_t *a) 18830 { 18831 return (uint16x8_t) 18832 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); 18833 } 18834 18835 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 18836 vld1q_u32 (const uint32_t *a) 18837 { 18838 return (uint32x4_t) 18839 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); 18840 } 18841 18842 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 18843 vld1q_u64 (const uint64_t *a) 18844 { 18845 return (uint64x2_t) 18846 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); 18847 } 18848 18849 /* vldn */ 18850 18851 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) 18852 vld2_s64 (const int64_t * __a) 18853 { 18854 int64x1x2_t ret; 18855 __builtin_aarch64_simd_oi __o; 18856 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); 18857 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); 18858 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); 18859 return ret; 18860 } 18861 18862 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) 18863 vld2_u64 (const uint64_t * __a) 18864 { 18865 uint64x1x2_t ret; 18866 __builtin_aarch64_simd_oi __o; 18867 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); 18868 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); 18869 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); 18870 return ret; 18871 } 18872 18873 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__)) 18874 vld2_f64 (const float64_t * __a) 18875 { 18876 float64x1x2_t ret; 18877 __builtin_aarch64_simd_oi __o; 18878 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a); 18879 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0); 18880 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1); 18881 return ret; 18882 } 18883 18884 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) 18885 vld2_s8 (const int8_t * __a) 18886 { 18887 int8x8x2_t ret; 18888 __builtin_aarch64_simd_oi __o; 18889 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); 18890 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); 18891 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); 18892 return ret; 18893 } 18894 18895 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) 18896 vld2_p8 (const poly8_t * __a) 18897 { 18898 poly8x8x2_t ret; 18899 __builtin_aarch64_simd_oi __o; 18900 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); 18901 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); 18902 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); 18903 return ret; 18904 } 18905 18906 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) 18907 vld2_s16 (const int16_t * __a) 18908 { 18909 int16x4x2_t ret; 18910 __builtin_aarch64_simd_oi __o; 18911 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); 18912 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); 18913 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); 18914 return ret; 18915 } 18916 18917 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) 18918 vld2_p16 (const poly16_t * __a) 18919 { 18920 poly16x4x2_t ret; 18921 __builtin_aarch64_simd_oi __o; 18922 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); 18923 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); 18924 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); 18925 return ret; 18926 } 18927 18928 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) 18929 vld2_s32 (const int32_t * __a) 18930 { 18931 int32x2x2_t ret; 18932 __builtin_aarch64_simd_oi __o; 18933 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); 18934 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); 18935 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); 18936 return ret; 18937 } 18938 18939 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) 18940 vld2_u8 (const uint8_t * __a) 18941 { 18942 uint8x8x2_t ret; 18943 __builtin_aarch64_simd_oi __o; 18944 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); 18945 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); 18946 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); 18947 return ret; 18948 } 18949 18950 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) 18951 vld2_u16 (const uint16_t * __a) 18952 { 18953 uint16x4x2_t ret; 18954 __builtin_aarch64_simd_oi __o; 18955 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); 18956 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); 18957 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); 18958 return ret; 18959 } 18960 18961 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) 18962 vld2_u32 (const uint32_t * __a) 18963 { 18964 uint32x2x2_t ret; 18965 __builtin_aarch64_simd_oi __o; 18966 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); 18967 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); 18968 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); 18969 return ret; 18970 } 18971 18972 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) 18973 vld2_f32 (const float32_t * __a) 18974 { 18975 float32x2x2_t ret; 18976 __builtin_aarch64_simd_oi __o; 18977 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a); 18978 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); 18979 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); 18980 return ret; 18981 } 18982 18983 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) 18984 vld2q_s8 (const int8_t * __a) 18985 { 18986 int8x16x2_t ret; 18987 __builtin_aarch64_simd_oi __o; 18988 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); 18989 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); 18990 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); 18991 return ret; 18992 } 18993 18994 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) 18995 vld2q_p8 (const poly8_t * __a) 18996 { 18997 poly8x16x2_t ret; 18998 __builtin_aarch64_simd_oi __o; 18999 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); 19000 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); 19001 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); 19002 return ret; 19003 } 19004 19005 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) 19006 vld2q_s16 (const int16_t * __a) 19007 { 19008 int16x8x2_t ret; 19009 __builtin_aarch64_simd_oi __o; 19010 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); 19011 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); 19012 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); 19013 return ret; 19014 } 19015 19016 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) 19017 vld2q_p16 (const poly16_t * __a) 19018 { 19019 poly16x8x2_t ret; 19020 __builtin_aarch64_simd_oi __o; 19021 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); 19022 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); 19023 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); 19024 return ret; 19025 } 19026 19027 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) 19028 vld2q_s32 (const int32_t * __a) 19029 { 19030 int32x4x2_t ret; 19031 __builtin_aarch64_simd_oi __o; 19032 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); 19033 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); 19034 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); 19035 return ret; 19036 } 19037 19038 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__)) 19039 vld2q_s64 (const int64_t * __a) 19040 { 19041 int64x2x2_t ret; 19042 __builtin_aarch64_simd_oi __o; 19043 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); 19044 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); 19045 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); 19046 return ret; 19047 } 19048 19049 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) 19050 vld2q_u8 (const uint8_t * __a) 19051 { 19052 uint8x16x2_t ret; 19053 __builtin_aarch64_simd_oi __o; 19054 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); 19055 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); 19056 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); 19057 return ret; 19058 } 19059 19060 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) 19061 vld2q_u16 (const uint16_t * __a) 19062 { 19063 uint16x8x2_t ret; 19064 __builtin_aarch64_simd_oi __o; 19065 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); 19066 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); 19067 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); 19068 return ret; 19069 } 19070 19071 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) 19072 vld2q_u32 (const uint32_t * __a) 19073 { 19074 uint32x4x2_t ret; 19075 __builtin_aarch64_simd_oi __o; 19076 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); 19077 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); 19078 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); 19079 return ret; 19080 } 19081 19082 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__)) 19083 vld2q_u64 (const uint64_t * __a) 19084 { 19085 uint64x2x2_t ret; 19086 __builtin_aarch64_simd_oi __o; 19087 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); 19088 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); 19089 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); 19090 return ret; 19091 } 19092 19093 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) 19094 vld2q_f32 (const float32_t * __a) 19095 { 19096 float32x4x2_t ret; 19097 __builtin_aarch64_simd_oi __o; 19098 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); 19099 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); 19100 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); 19101 return ret; 19102 } 19103 19104 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__)) 19105 vld2q_f64 (const float64_t * __a) 19106 { 19107 float64x2x2_t ret; 19108 __builtin_aarch64_simd_oi __o; 19109 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); 19110 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); 19111 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); 19112 return ret; 19113 } 19114 19115 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) 19116 vld3_s64 (const int64_t * __a) 19117 { 19118 int64x1x3_t ret; 19119 __builtin_aarch64_simd_ci __o; 19120 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); 19121 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); 19122 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); 19123 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); 19124 return ret; 19125 } 19126 19127 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) 19128 vld3_u64 (const uint64_t * __a) 19129 { 19130 uint64x1x3_t ret; 19131 __builtin_aarch64_simd_ci __o; 19132 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); 19133 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); 19134 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); 19135 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); 19136 return ret; 19137 } 19138 19139 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__)) 19140 vld3_f64 (const float64_t * __a) 19141 { 19142 float64x1x3_t ret; 19143 __builtin_aarch64_simd_ci __o; 19144 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a); 19145 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0); 19146 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1); 19147 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2); 19148 return ret; 19149 } 19150 19151 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) 19152 vld3_s8 (const int8_t * __a) 19153 { 19154 int8x8x3_t ret; 19155 __builtin_aarch64_simd_ci __o; 19156 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); 19157 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); 19158 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); 19159 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); 19160 return ret; 19161 } 19162 19163 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) 19164 vld3_p8 (const poly8_t * __a) 19165 { 19166 poly8x8x3_t ret; 19167 __builtin_aarch64_simd_ci __o; 19168 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); 19169 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); 19170 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); 19171 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); 19172 return ret; 19173 } 19174 19175 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) 19176 vld3_s16 (const int16_t * __a) 19177 { 19178 int16x4x3_t ret; 19179 __builtin_aarch64_simd_ci __o; 19180 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); 19181 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); 19182 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); 19183 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); 19184 return ret; 19185 } 19186 19187 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) 19188 vld3_p16 (const poly16_t * __a) 19189 { 19190 poly16x4x3_t ret; 19191 __builtin_aarch64_simd_ci __o; 19192 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); 19193 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); 19194 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); 19195 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); 19196 return ret; 19197 } 19198 19199 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) 19200 vld3_s32 (const int32_t * __a) 19201 { 19202 int32x2x3_t ret; 19203 __builtin_aarch64_simd_ci __o; 19204 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); 19205 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); 19206 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); 19207 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); 19208 return ret; 19209 } 19210 19211 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) 19212 vld3_u8 (const uint8_t * __a) 19213 { 19214 uint8x8x3_t ret; 19215 __builtin_aarch64_simd_ci __o; 19216 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); 19217 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); 19218 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); 19219 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); 19220 return ret; 19221 } 19222 19223 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) 19224 vld3_u16 (const uint16_t * __a) 19225 { 19226 uint16x4x3_t ret; 19227 __builtin_aarch64_simd_ci __o; 19228 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); 19229 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); 19230 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); 19231 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); 19232 return ret; 19233 } 19234 19235 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) 19236 vld3_u32 (const uint32_t * __a) 19237 { 19238 uint32x2x3_t ret; 19239 __builtin_aarch64_simd_ci __o; 19240 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); 19241 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); 19242 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); 19243 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); 19244 return ret; 19245 } 19246 19247 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) 19248 vld3_f32 (const float32_t * __a) 19249 { 19250 float32x2x3_t ret; 19251 __builtin_aarch64_simd_ci __o; 19252 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a); 19253 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); 19254 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); 19255 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); 19256 return ret; 19257 } 19258 19259 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) 19260 vld3q_s8 (const int8_t * __a) 19261 { 19262 int8x16x3_t ret; 19263 __builtin_aarch64_simd_ci __o; 19264 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); 19265 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); 19266 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); 19267 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); 19268 return ret; 19269 } 19270 19271 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) 19272 vld3q_p8 (const poly8_t * __a) 19273 { 19274 poly8x16x3_t ret; 19275 __builtin_aarch64_simd_ci __o; 19276 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); 19277 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); 19278 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); 19279 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); 19280 return ret; 19281 } 19282 19283 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) 19284 vld3q_s16 (const int16_t * __a) 19285 { 19286 int16x8x3_t ret; 19287 __builtin_aarch64_simd_ci __o; 19288 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); 19289 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); 19290 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); 19291 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); 19292 return ret; 19293 } 19294 19295 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) 19296 vld3q_p16 (const poly16_t * __a) 19297 { 19298 poly16x8x3_t ret; 19299 __builtin_aarch64_simd_ci __o; 19300 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); 19301 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); 19302 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); 19303 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); 19304 return ret; 19305 } 19306 19307 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) 19308 vld3q_s32 (const int32_t * __a) 19309 { 19310 int32x4x3_t ret; 19311 __builtin_aarch64_simd_ci __o; 19312 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); 19313 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); 19314 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); 19315 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); 19316 return ret; 19317 } 19318 19319 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__)) 19320 vld3q_s64 (const int64_t * __a) 19321 { 19322 int64x2x3_t ret; 19323 __builtin_aarch64_simd_ci __o; 19324 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); 19325 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); 19326 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); 19327 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); 19328 return ret; 19329 } 19330 19331 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) 19332 vld3q_u8 (const uint8_t * __a) 19333 { 19334 uint8x16x3_t ret; 19335 __builtin_aarch64_simd_ci __o; 19336 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); 19337 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); 19338 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); 19339 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); 19340 return ret; 19341 } 19342 19343 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) 19344 vld3q_u16 (const uint16_t * __a) 19345 { 19346 uint16x8x3_t ret; 19347 __builtin_aarch64_simd_ci __o; 19348 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); 19349 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); 19350 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); 19351 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); 19352 return ret; 19353 } 19354 19355 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) 19356 vld3q_u32 (const uint32_t * __a) 19357 { 19358 uint32x4x3_t ret; 19359 __builtin_aarch64_simd_ci __o; 19360 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); 19361 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); 19362 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); 19363 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); 19364 return ret; 19365 } 19366 19367 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__)) 19368 vld3q_u64 (const uint64_t * __a) 19369 { 19370 uint64x2x3_t ret; 19371 __builtin_aarch64_simd_ci __o; 19372 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); 19373 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); 19374 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); 19375 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); 19376 return ret; 19377 } 19378 19379 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) 19380 vld3q_f32 (const float32_t * __a) 19381 { 19382 float32x4x3_t ret; 19383 __builtin_aarch64_simd_ci __o; 19384 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); 19385 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); 19386 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); 19387 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); 19388 return ret; 19389 } 19390 19391 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__)) 19392 vld3q_f64 (const float64_t * __a) 19393 { 19394 float64x2x3_t ret; 19395 __builtin_aarch64_simd_ci __o; 19396 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); 19397 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); 19398 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); 19399 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); 19400 return ret; 19401 } 19402 19403 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) 19404 vld4_s64 (const int64_t * __a) 19405 { 19406 int64x1x4_t ret; 19407 __builtin_aarch64_simd_xi __o; 19408 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); 19409 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); 19410 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); 19411 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); 19412 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); 19413 return ret; 19414 } 19415 19416 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) 19417 vld4_u64 (const uint64_t * __a) 19418 { 19419 uint64x1x4_t ret; 19420 __builtin_aarch64_simd_xi __o; 19421 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); 19422 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); 19423 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); 19424 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); 19425 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); 19426 return ret; 19427 } 19428 19429 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__)) 19430 vld4_f64 (const float64_t * __a) 19431 { 19432 float64x1x4_t ret; 19433 __builtin_aarch64_simd_xi __o; 19434 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a); 19435 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0); 19436 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1); 19437 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2); 19438 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3); 19439 return ret; 19440 } 19441 19442 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) 19443 vld4_s8 (const int8_t * __a) 19444 { 19445 int8x8x4_t ret; 19446 __builtin_aarch64_simd_xi __o; 19447 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); 19448 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); 19449 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); 19450 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); 19451 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); 19452 return ret; 19453 } 19454 19455 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) 19456 vld4_p8 (const poly8_t * __a) 19457 { 19458 poly8x8x4_t ret; 19459 __builtin_aarch64_simd_xi __o; 19460 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); 19461 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); 19462 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); 19463 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); 19464 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); 19465 return ret; 19466 } 19467 19468 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) 19469 vld4_s16 (const int16_t * __a) 19470 { 19471 int16x4x4_t ret; 19472 __builtin_aarch64_simd_xi __o; 19473 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); 19474 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); 19475 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); 19476 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); 19477 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); 19478 return ret; 19479 } 19480 19481 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) 19482 vld4_p16 (const poly16_t * __a) 19483 { 19484 poly16x4x4_t ret; 19485 __builtin_aarch64_simd_xi __o; 19486 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); 19487 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); 19488 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); 19489 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); 19490 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); 19491 return ret; 19492 } 19493 19494 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) 19495 vld4_s32 (const int32_t * __a) 19496 { 19497 int32x2x4_t ret; 19498 __builtin_aarch64_simd_xi __o; 19499 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); 19500 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); 19501 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); 19502 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); 19503 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); 19504 return ret; 19505 } 19506 19507 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) 19508 vld4_u8 (const uint8_t * __a) 19509 { 19510 uint8x8x4_t ret; 19511 __builtin_aarch64_simd_xi __o; 19512 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); 19513 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); 19514 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); 19515 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); 19516 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); 19517 return ret; 19518 } 19519 19520 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) 19521 vld4_u16 (const uint16_t * __a) 19522 { 19523 uint16x4x4_t ret; 19524 __builtin_aarch64_simd_xi __o; 19525 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); 19526 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); 19527 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); 19528 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); 19529 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); 19530 return ret; 19531 } 19532 19533 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) 19534 vld4_u32 (const uint32_t * __a) 19535 { 19536 uint32x2x4_t ret; 19537 __builtin_aarch64_simd_xi __o; 19538 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); 19539 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); 19540 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); 19541 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); 19542 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); 19543 return ret; 19544 } 19545 19546 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) 19547 vld4_f32 (const float32_t * __a) 19548 { 19549 float32x2x4_t ret; 19550 __builtin_aarch64_simd_xi __o; 19551 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a); 19552 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); 19553 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); 19554 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); 19555 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); 19556 return ret; 19557 } 19558 19559 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) 19560 vld4q_s8 (const int8_t * __a) 19561 { 19562 int8x16x4_t ret; 19563 __builtin_aarch64_simd_xi __o; 19564 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); 19565 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); 19566 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); 19567 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); 19568 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); 19569 return ret; 19570 } 19571 19572 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) 19573 vld4q_p8 (const poly8_t * __a) 19574 { 19575 poly8x16x4_t ret; 19576 __builtin_aarch64_simd_xi __o; 19577 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); 19578 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); 19579 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); 19580 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); 19581 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); 19582 return ret; 19583 } 19584 19585 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) 19586 vld4q_s16 (const int16_t * __a) 19587 { 19588 int16x8x4_t ret; 19589 __builtin_aarch64_simd_xi __o; 19590 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); 19591 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); 19592 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); 19593 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); 19594 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); 19595 return ret; 19596 } 19597 19598 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) 19599 vld4q_p16 (const poly16_t * __a) 19600 { 19601 poly16x8x4_t ret; 19602 __builtin_aarch64_simd_xi __o; 19603 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); 19604 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); 19605 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); 19606 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); 19607 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); 19608 return ret; 19609 } 19610 19611 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) 19612 vld4q_s32 (const int32_t * __a) 19613 { 19614 int32x4x4_t ret; 19615 __builtin_aarch64_simd_xi __o; 19616 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); 19617 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); 19618 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); 19619 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); 19620 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); 19621 return ret; 19622 } 19623 19624 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__)) 19625 vld4q_s64 (const int64_t * __a) 19626 { 19627 int64x2x4_t ret; 19628 __builtin_aarch64_simd_xi __o; 19629 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); 19630 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); 19631 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); 19632 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); 19633 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); 19634 return ret; 19635 } 19636 19637 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) 19638 vld4q_u8 (const uint8_t * __a) 19639 { 19640 uint8x16x4_t ret; 19641 __builtin_aarch64_simd_xi __o; 19642 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); 19643 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); 19644 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); 19645 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); 19646 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); 19647 return ret; 19648 } 19649 19650 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) 19651 vld4q_u16 (const uint16_t * __a) 19652 { 19653 uint16x8x4_t ret; 19654 __builtin_aarch64_simd_xi __o; 19655 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); 19656 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); 19657 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); 19658 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); 19659 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); 19660 return ret; 19661 } 19662 19663 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) 19664 vld4q_u32 (const uint32_t * __a) 19665 { 19666 uint32x4x4_t ret; 19667 __builtin_aarch64_simd_xi __o; 19668 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); 19669 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); 19670 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); 19671 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); 19672 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); 19673 return ret; 19674 } 19675 19676 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__)) 19677 vld4q_u64 (const uint64_t * __a) 19678 { 19679 uint64x2x4_t ret; 19680 __builtin_aarch64_simd_xi __o; 19681 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); 19682 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); 19683 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); 19684 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); 19685 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); 19686 return ret; 19687 } 19688 19689 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) 19690 vld4q_f32 (const float32_t * __a) 19691 { 19692 float32x4x4_t ret; 19693 __builtin_aarch64_simd_xi __o; 19694 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); 19695 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); 19696 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); 19697 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); 19698 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); 19699 return ret; 19700 } 19701 19702 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__)) 19703 vld4q_f64 (const float64_t * __a) 19704 { 19705 float64x2x4_t ret; 19706 __builtin_aarch64_simd_xi __o; 19707 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); 19708 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); 19709 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); 19710 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); 19711 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); 19712 return ret; 19713 } 19714 19715 /* vmax */ 19716 19717 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 19718 vmax_f32 (float32x2_t __a, float32x2_t __b) 19719 { 19720 return __builtin_aarch64_smax_nanv2sf (__a, __b); 19721 } 19722 19723 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 19724 vmax_s8 (int8x8_t __a, int8x8_t __b) 19725 { 19726 return __builtin_aarch64_smaxv8qi (__a, __b); 19727 } 19728 19729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 19730 vmax_s16 (int16x4_t __a, int16x4_t __b) 19731 { 19732 return __builtin_aarch64_smaxv4hi (__a, __b); 19733 } 19734 19735 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 19736 vmax_s32 (int32x2_t __a, int32x2_t __b) 19737 { 19738 return __builtin_aarch64_smaxv2si (__a, __b); 19739 } 19740 19741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 19742 vmax_u8 (uint8x8_t __a, uint8x8_t __b) 19743 { 19744 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a, 19745 (int8x8_t) __b); 19746 } 19747 19748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 19749 vmax_u16 (uint16x4_t __a, uint16x4_t __b) 19750 { 19751 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a, 19752 (int16x4_t) __b); 19753 } 19754 19755 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 19756 vmax_u32 (uint32x2_t __a, uint32x2_t __b) 19757 { 19758 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a, 19759 (int32x2_t) __b); 19760 } 19761 19762 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 19763 vmaxq_f32 (float32x4_t __a, float32x4_t __b) 19764 { 19765 return __builtin_aarch64_smax_nanv4sf (__a, __b); 19766 } 19767 19768 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 19769 vmaxq_f64 (float64x2_t __a, float64x2_t __b) 19770 { 19771 return __builtin_aarch64_smax_nanv2df (__a, __b); 19772 } 19773 19774 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 19775 vmaxq_s8 (int8x16_t __a, int8x16_t __b) 19776 { 19777 return __builtin_aarch64_smaxv16qi (__a, __b); 19778 } 19779 19780 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 19781 vmaxq_s16 (int16x8_t __a, int16x8_t __b) 19782 { 19783 return __builtin_aarch64_smaxv8hi (__a, __b); 19784 } 19785 19786 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 19787 vmaxq_s32 (int32x4_t __a, int32x4_t __b) 19788 { 19789 return __builtin_aarch64_smaxv4si (__a, __b); 19790 } 19791 19792 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 19793 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b) 19794 { 19795 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a, 19796 (int8x16_t) __b); 19797 } 19798 19799 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 19800 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b) 19801 { 19802 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a, 19803 (int16x8_t) __b); 19804 } 19805 19806 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 19807 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) 19808 { 19809 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a, 19810 (int32x4_t) __b); 19811 } 19812 19813 /* vmaxnm */ 19814 19815 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 19816 vmaxnm_f32 (float32x2_t __a, float32x2_t __b) 19817 { 19818 return __builtin_aarch64_smaxv2sf (__a, __b); 19819 } 19820 19821 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 19822 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) 19823 { 19824 return __builtin_aarch64_smaxv4sf (__a, __b); 19825 } 19826 19827 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 19828 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) 19829 { 19830 return __builtin_aarch64_smaxv2df (__a, __b); 19831 } 19832 19833 /* vmaxv */ 19834 19835 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 19836 vmaxv_f32 (float32x2_t __a) 19837 { 19838 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 19839 0); 19840 } 19841 19842 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 19843 vmaxv_s8 (int8x8_t __a) 19844 { 19845 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0); 19846 } 19847 19848 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 19849 vmaxv_s16 (int16x4_t __a) 19850 { 19851 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0); 19852 } 19853 19854 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 19855 vmaxv_s32 (int32x2_t __a) 19856 { 19857 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0); 19858 } 19859 19860 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 19861 vmaxv_u8 (uint8x8_t __a) 19862 { 19863 return vget_lane_u8 ((uint8x8_t) 19864 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 19865 0); 19866 } 19867 19868 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 19869 vmaxv_u16 (uint16x4_t __a) 19870 { 19871 return vget_lane_u16 ((uint16x4_t) 19872 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 19873 0); 19874 } 19875 19876 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 19877 vmaxv_u32 (uint32x2_t __a) 19878 { 19879 return vget_lane_u32 ((uint32x2_t) 19880 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 19881 0); 19882 } 19883 19884 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 19885 vmaxvq_f32 (float32x4_t __a) 19886 { 19887 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 19888 0); 19889 } 19890 19891 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 19892 vmaxvq_f64 (float64x2_t __a) 19893 { 19894 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 19895 0); 19896 } 19897 19898 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 19899 vmaxvq_s8 (int8x16_t __a) 19900 { 19901 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0); 19902 } 19903 19904 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 19905 vmaxvq_s16 (int16x8_t __a) 19906 { 19907 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0); 19908 } 19909 19910 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 19911 vmaxvq_s32 (int32x4_t __a) 19912 { 19913 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0); 19914 } 19915 19916 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 19917 vmaxvq_u8 (uint8x16_t __a) 19918 { 19919 return vgetq_lane_u8 ((uint8x16_t) 19920 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 19921 0); 19922 } 19923 19924 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 19925 vmaxvq_u16 (uint16x8_t __a) 19926 { 19927 return vgetq_lane_u16 ((uint16x8_t) 19928 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 19929 0); 19930 } 19931 19932 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 19933 vmaxvq_u32 (uint32x4_t __a) 19934 { 19935 return vgetq_lane_u32 ((uint32x4_t) 19936 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 19937 0); 19938 } 19939 19940 /* vmaxnmv */ 19941 19942 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 19943 vmaxnmv_f32 (float32x2_t __a) 19944 { 19945 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 19946 0); 19947 } 19948 19949 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 19950 vmaxnmvq_f32 (float32x4_t __a) 19951 { 19952 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0); 19953 } 19954 19955 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 19956 vmaxnmvq_f64 (float64x2_t __a) 19957 { 19958 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0); 19959 } 19960 19961 /* vmin */ 19962 19963 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 19964 vmin_f32 (float32x2_t __a, float32x2_t __b) 19965 { 19966 return __builtin_aarch64_smin_nanv2sf (__a, __b); 19967 } 19968 19969 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 19970 vmin_s8 (int8x8_t __a, int8x8_t __b) 19971 { 19972 return __builtin_aarch64_sminv8qi (__a, __b); 19973 } 19974 19975 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 19976 vmin_s16 (int16x4_t __a, int16x4_t __b) 19977 { 19978 return __builtin_aarch64_sminv4hi (__a, __b); 19979 } 19980 19981 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 19982 vmin_s32 (int32x2_t __a, int32x2_t __b) 19983 { 19984 return __builtin_aarch64_sminv2si (__a, __b); 19985 } 19986 19987 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 19988 vmin_u8 (uint8x8_t __a, uint8x8_t __b) 19989 { 19990 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a, 19991 (int8x8_t) __b); 19992 } 19993 19994 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 19995 vmin_u16 (uint16x4_t __a, uint16x4_t __b) 19996 { 19997 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a, 19998 (int16x4_t) __b); 19999 } 20000 20001 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20002 vmin_u32 (uint32x2_t __a, uint32x2_t __b) 20003 { 20004 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a, 20005 (int32x2_t) __b); 20006 } 20007 20008 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20009 vminq_f32 (float32x4_t __a, float32x4_t __b) 20010 { 20011 return __builtin_aarch64_smin_nanv4sf (__a, __b); 20012 } 20013 20014 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20015 vminq_f64 (float64x2_t __a, float64x2_t __b) 20016 { 20017 return __builtin_aarch64_smin_nanv2df (__a, __b); 20018 } 20019 20020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 20021 vminq_s8 (int8x16_t __a, int8x16_t __b) 20022 { 20023 return __builtin_aarch64_sminv16qi (__a, __b); 20024 } 20025 20026 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20027 vminq_s16 (int16x8_t __a, int16x8_t __b) 20028 { 20029 return __builtin_aarch64_sminv8hi (__a, __b); 20030 } 20031 20032 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20033 vminq_s32 (int32x4_t __a, int32x4_t __b) 20034 { 20035 return __builtin_aarch64_sminv4si (__a, __b); 20036 } 20037 20038 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 20039 vminq_u8 (uint8x16_t __a, uint8x16_t __b) 20040 { 20041 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a, 20042 (int8x16_t) __b); 20043 } 20044 20045 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20046 vminq_u16 (uint16x8_t __a, uint16x8_t __b) 20047 { 20048 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a, 20049 (int16x8_t) __b); 20050 } 20051 20052 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20053 vminq_u32 (uint32x4_t __a, uint32x4_t __b) 20054 { 20055 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a, 20056 (int32x4_t) __b); 20057 } 20058 20059 /* vminnm */ 20060 20061 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20062 vminnm_f32 (float32x2_t __a, float32x2_t __b) 20063 { 20064 return __builtin_aarch64_sminv2sf (__a, __b); 20065 } 20066 20067 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20068 vminnmq_f32 (float32x4_t __a, float32x4_t __b) 20069 { 20070 return __builtin_aarch64_sminv4sf (__a, __b); 20071 } 20072 20073 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20074 vminnmq_f64 (float64x2_t __a, float64x2_t __b) 20075 { 20076 return __builtin_aarch64_sminv2df (__a, __b); 20077 } 20078 20079 /* vminv */ 20080 20081 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 20082 vminv_f32 (float32x2_t __a) 20083 { 20084 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 20085 0); 20086 } 20087 20088 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 20089 vminv_s8 (int8x8_t __a) 20090 { 20091 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 20092 0); 20093 } 20094 20095 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 20096 vminv_s16 (int16x4_t __a) 20097 { 20098 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0); 20099 } 20100 20101 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 20102 vminv_s32 (int32x2_t __a) 20103 { 20104 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0); 20105 } 20106 20107 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 20108 vminv_u8 (uint8x8_t __a) 20109 { 20110 return vget_lane_u8 ((uint8x8_t) 20111 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 20112 0); 20113 } 20114 20115 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 20116 vminv_u16 (uint16x4_t __a) 20117 { 20118 return vget_lane_u16 ((uint16x4_t) 20119 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 20120 0); 20121 } 20122 20123 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 20124 vminv_u32 (uint32x2_t __a) 20125 { 20126 return vget_lane_u32 ((uint32x2_t) 20127 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 20128 0); 20129 } 20130 20131 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 20132 vminvq_f32 (float32x4_t __a) 20133 { 20134 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 20135 0); 20136 } 20137 20138 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 20139 vminvq_f64 (float64x2_t __a) 20140 { 20141 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 20142 0); 20143 } 20144 20145 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 20146 vminvq_s8 (int8x16_t __a) 20147 { 20148 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0); 20149 } 20150 20151 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 20152 vminvq_s16 (int16x8_t __a) 20153 { 20154 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0); 20155 } 20156 20157 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 20158 vminvq_s32 (int32x4_t __a) 20159 { 20160 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0); 20161 } 20162 20163 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 20164 vminvq_u8 (uint8x16_t __a) 20165 { 20166 return vgetq_lane_u8 ((uint8x16_t) 20167 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 20168 0); 20169 } 20170 20171 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 20172 vminvq_u16 (uint16x8_t __a) 20173 { 20174 return vgetq_lane_u16 ((uint16x8_t) 20175 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 20176 0); 20177 } 20178 20179 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 20180 vminvq_u32 (uint32x4_t __a) 20181 { 20182 return vgetq_lane_u32 ((uint32x4_t) 20183 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 20184 0); 20185 } 20186 20187 /* vminnmv */ 20188 20189 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 20190 vminnmv_f32 (float32x2_t __a) 20191 { 20192 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0); 20193 } 20194 20195 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 20196 vminnmvq_f32 (float32x4_t __a) 20197 { 20198 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0); 20199 } 20200 20201 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 20202 vminnmvq_f64 (float64x2_t __a) 20203 { 20204 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0); 20205 } 20206 20207 /* vmla */ 20208 20209 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20210 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c) 20211 { 20212 return a + b * c; 20213 } 20214 20215 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20216 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) 20217 { 20218 return a + b * c; 20219 } 20220 20221 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20222 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) 20223 { 20224 return a + b * c; 20225 } 20226 20227 /* vmla_lane */ 20228 20229 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20230 vmla_lane_f32 (float32x2_t __a, float32x2_t __b, 20231 float32x2_t __c, const int __lane) 20232 { 20233 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane))); 20234 } 20235 20236 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20237 vmla_lane_s16 (int16x4_t __a, int16x4_t __b, 20238 int16x4_t __c, const int __lane) 20239 { 20240 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane))); 20241 } 20242 20243 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20244 vmla_lane_s32 (int32x2_t __a, int32x2_t __b, 20245 int32x2_t __c, const int __lane) 20246 { 20247 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane))); 20248 } 20249 20250 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 20251 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, 20252 uint16x4_t __c, const int __lane) 20253 { 20254 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane))); 20255 } 20256 20257 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20258 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, 20259 uint32x2_t __c, const int __lane) 20260 { 20261 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane))); 20262 } 20263 20264 /* vmla_laneq */ 20265 20266 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20267 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b, 20268 float32x4_t __c, const int __lane) 20269 { 20270 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); 20271 } 20272 20273 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20274 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b, 20275 int16x8_t __c, const int __lane) 20276 { 20277 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); 20278 } 20279 20280 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20281 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b, 20282 int32x4_t __c, const int __lane) 20283 { 20284 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); 20285 } 20286 20287 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 20288 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b, 20289 uint16x8_t __c, const int __lane) 20290 { 20291 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); 20292 } 20293 20294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20295 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b, 20296 uint32x4_t __c, const int __lane) 20297 { 20298 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); 20299 } 20300 20301 /* vmlaq_lane */ 20302 20303 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20304 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, 20305 float32x2_t __c, const int __lane) 20306 { 20307 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane))); 20308 } 20309 20310 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20311 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, 20312 int16x4_t __c, const int __lane) 20313 { 20314 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane))); 20315 } 20316 20317 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20318 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, 20319 int32x2_t __c, const int __lane) 20320 { 20321 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane))); 20322 } 20323 20324 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20325 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, 20326 uint16x4_t __c, const int __lane) 20327 { 20328 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane))); 20329 } 20330 20331 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20332 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, 20333 uint32x2_t __c, const int __lane) 20334 { 20335 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane))); 20336 } 20337 20338 /* vmlaq_laneq */ 20339 20340 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20341 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b, 20342 float32x4_t __c, const int __lane) 20343 { 20344 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); 20345 } 20346 20347 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20348 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b, 20349 int16x8_t __c, const int __lane) 20350 { 20351 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); 20352 } 20353 20354 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20355 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b, 20356 int32x4_t __c, const int __lane) 20357 { 20358 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); 20359 } 20360 20361 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20362 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, 20363 uint16x8_t __c, const int __lane) 20364 { 20365 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); 20366 } 20367 20368 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20369 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, 20370 uint32x4_t __c, const int __lane) 20371 { 20372 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); 20373 } 20374 20375 /* vmls */ 20376 20377 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20378 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c) 20379 { 20380 return a - b * c; 20381 } 20382 20383 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20384 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) 20385 { 20386 return a - b * c; 20387 } 20388 20389 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20390 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) 20391 { 20392 return a - b * c; 20393 } 20394 20395 /* vmls_lane */ 20396 20397 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20398 vmls_lane_f32 (float32x2_t __a, float32x2_t __b, 20399 float32x2_t __c, const int __lane) 20400 { 20401 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane))); 20402 } 20403 20404 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20405 vmls_lane_s16 (int16x4_t __a, int16x4_t __b, 20406 int16x4_t __c, const int __lane) 20407 { 20408 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane))); 20409 } 20410 20411 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20412 vmls_lane_s32 (int32x2_t __a, int32x2_t __b, 20413 int32x2_t __c, const int __lane) 20414 { 20415 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane))); 20416 } 20417 20418 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 20419 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, 20420 uint16x4_t __c, const int __lane) 20421 { 20422 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane))); 20423 } 20424 20425 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20426 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, 20427 uint32x2_t __c, const int __lane) 20428 { 20429 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane))); 20430 } 20431 20432 /* vmls_laneq */ 20433 20434 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20435 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b, 20436 float32x4_t __c, const int __lane) 20437 { 20438 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); 20439 } 20440 20441 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20442 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b, 20443 int16x8_t __c, const int __lane) 20444 { 20445 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); 20446 } 20447 20448 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20449 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b, 20450 int32x4_t __c, const int __lane) 20451 { 20452 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); 20453 } 20454 20455 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 20456 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b, 20457 uint16x8_t __c, const int __lane) 20458 { 20459 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); 20460 } 20461 20462 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20463 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b, 20464 uint32x4_t __c, const int __lane) 20465 { 20466 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); 20467 } 20468 20469 /* vmlsq_lane */ 20470 20471 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20472 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, 20473 float32x2_t __c, const int __lane) 20474 { 20475 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane))); 20476 } 20477 20478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20479 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, 20480 int16x4_t __c, const int __lane) 20481 { 20482 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane))); 20483 } 20484 20485 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20486 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, 20487 int32x2_t __c, const int __lane) 20488 { 20489 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane))); 20490 } 20491 20492 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20493 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, 20494 uint16x4_t __c, const int __lane) 20495 { 20496 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane))); 20497 } 20498 20499 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20500 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, 20501 uint32x2_t __c, const int __lane) 20502 { 20503 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane))); 20504 } 20505 20506 /* vmlsq_laneq */ 20507 20508 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20509 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b, 20510 float32x4_t __c, const int __lane) 20511 { 20512 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); 20513 } 20514 20515 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20516 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b, 20517 int16x8_t __c, const int __lane) 20518 { 20519 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); 20520 } 20521 20522 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20523 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b, 20524 int32x4_t __c, const int __lane) 20525 { 20526 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); 20527 } 20528 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20529 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, 20530 uint16x8_t __c, const int __lane) 20531 { 20532 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); 20533 } 20534 20535 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20536 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, 20537 uint32x4_t __c, const int __lane) 20538 { 20539 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); 20540 } 20541 20542 /* vmov_n_ */ 20543 20544 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20545 vmov_n_f32 (float32_t __a) 20546 { 20547 return vdup_n_f32 (__a); 20548 } 20549 20550 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 20551 vmov_n_f64 (float64_t __a) 20552 { 20553 return __a; 20554 } 20555 20556 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 20557 vmov_n_p8 (poly8_t __a) 20558 { 20559 return vdup_n_p8 (__a); 20560 } 20561 20562 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) 20563 vmov_n_p16 (poly16_t __a) 20564 { 20565 return vdup_n_p16 (__a); 20566 } 20567 20568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 20569 vmov_n_s8 (int8_t __a) 20570 { 20571 return vdup_n_s8 (__a); 20572 } 20573 20574 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20575 vmov_n_s16 (int16_t __a) 20576 { 20577 return vdup_n_s16 (__a); 20578 } 20579 20580 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20581 vmov_n_s32 (int32_t __a) 20582 { 20583 return vdup_n_s32 (__a); 20584 } 20585 20586 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 20587 vmov_n_s64 (int64_t __a) 20588 { 20589 return __a; 20590 } 20591 20592 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 20593 vmov_n_u8 (uint8_t __a) 20594 { 20595 return vdup_n_u8 (__a); 20596 } 20597 20598 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 20599 vmov_n_u16 (uint16_t __a) 20600 { 20601 return vdup_n_u16 (__a); 20602 } 20603 20604 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20605 vmov_n_u32 (uint32_t __a) 20606 { 20607 return vdup_n_u32 (__a); 20608 } 20609 20610 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 20611 vmov_n_u64 (uint64_t __a) 20612 { 20613 return __a; 20614 } 20615 20616 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20617 vmovq_n_f32 (float32_t __a) 20618 { 20619 return vdupq_n_f32 (__a); 20620 } 20621 20622 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20623 vmovq_n_f64 (float64_t __a) 20624 { 20625 return vdupq_n_f64 (__a); 20626 } 20627 20628 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) 20629 vmovq_n_p8 (poly8_t __a) 20630 { 20631 return vdupq_n_p8 (__a); 20632 } 20633 20634 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) 20635 vmovq_n_p16 (poly16_t __a) 20636 { 20637 return vdupq_n_p16 (__a); 20638 } 20639 20640 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 20641 vmovq_n_s8 (int8_t __a) 20642 { 20643 return vdupq_n_s8 (__a); 20644 } 20645 20646 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20647 vmovq_n_s16 (int16_t __a) 20648 { 20649 return vdupq_n_s16 (__a); 20650 } 20651 20652 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20653 vmovq_n_s32 (int32_t __a) 20654 { 20655 return vdupq_n_s32 (__a); 20656 } 20657 20658 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 20659 vmovq_n_s64 (int64_t __a) 20660 { 20661 return vdupq_n_s64 (__a); 20662 } 20663 20664 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 20665 vmovq_n_u8 (uint8_t __a) 20666 { 20667 return vdupq_n_u8 (__a); 20668 } 20669 20670 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20671 vmovq_n_u16 (uint16_t __a) 20672 { 20673 return vdupq_n_u16 (__a); 20674 } 20675 20676 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20677 vmovq_n_u32 (uint32_t __a) 20678 { 20679 return vdupq_n_u32 (__a); 20680 } 20681 20682 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 20683 vmovq_n_u64 (uint64_t __a) 20684 { 20685 return vdupq_n_u64 (__a); 20686 } 20687 20688 /* vmul_lane */ 20689 20690 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20691 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane) 20692 { 20693 return __a * __aarch64_vget_lane_f32 (__b, __lane); 20694 } 20695 20696 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 20697 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane) 20698 { 20699 return __a * __b; 20700 } 20701 20702 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20703 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane) 20704 { 20705 return __a * __aarch64_vget_lane_s16 (__b, __lane); 20706 } 20707 20708 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20709 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane) 20710 { 20711 return __a * __aarch64_vget_lane_s32 (__b, __lane); 20712 } 20713 20714 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 20715 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane) 20716 { 20717 return __a * __aarch64_vget_lane_u16 (__b, __lane); 20718 } 20719 20720 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20721 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane) 20722 { 20723 return __a * __aarch64_vget_lane_u32 (__b, __lane); 20724 } 20725 20726 /* vmul_laneq */ 20727 20728 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20729 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane) 20730 { 20731 return __a * __aarch64_vgetq_lane_f32 (__b, __lane); 20732 } 20733 20734 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 20735 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane) 20736 { 20737 return __a * __aarch64_vgetq_lane_f64 (__b, __lane); 20738 } 20739 20740 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20741 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane) 20742 { 20743 return __a * __aarch64_vgetq_lane_s16 (__b, __lane); 20744 } 20745 20746 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20747 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane) 20748 { 20749 return __a * __aarch64_vgetq_lane_s32 (__b, __lane); 20750 } 20751 20752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 20753 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane) 20754 { 20755 return __a * __aarch64_vgetq_lane_u16 (__b, __lane); 20756 } 20757 20758 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 20759 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane) 20760 { 20761 return __a * __aarch64_vgetq_lane_u32 (__b, __lane); 20762 } 20763 20764 /* vmulq_lane */ 20765 20766 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20767 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane) 20768 { 20769 return __a * __aarch64_vget_lane_f32 (__b, __lane); 20770 } 20771 20772 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20773 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane) 20774 { 20775 return __a * __b; 20776 } 20777 20778 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20779 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane) 20780 { 20781 return __a * __aarch64_vget_lane_s16 (__b, __lane); 20782 } 20783 20784 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20785 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane) 20786 { 20787 return __a * __aarch64_vget_lane_s32 (__b, __lane); 20788 } 20789 20790 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20791 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane) 20792 { 20793 return __a * __aarch64_vget_lane_u16 (__b, __lane); 20794 } 20795 20796 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20797 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane) 20798 { 20799 return __a * __aarch64_vget_lane_u32 (__b, __lane); 20800 } 20801 20802 /* vmulq_laneq */ 20803 20804 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20805 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane) 20806 { 20807 return __a * __aarch64_vgetq_lane_f32 (__b, __lane); 20808 } 20809 20810 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20811 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane) 20812 { 20813 return __a * __aarch64_vgetq_lane_f64 (__b, __lane); 20814 } 20815 20816 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20817 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane) 20818 { 20819 return __a * __aarch64_vgetq_lane_s16 (__b, __lane); 20820 } 20821 20822 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20823 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane) 20824 { 20825 return __a * __aarch64_vgetq_lane_s32 (__b, __lane); 20826 } 20827 20828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 20829 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane) 20830 { 20831 return __a * __aarch64_vgetq_lane_u16 (__b, __lane); 20832 } 20833 20834 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 20835 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane) 20836 { 20837 return __a * __aarch64_vgetq_lane_u32 (__b, __lane); 20838 } 20839 20840 /* vneg */ 20841 20842 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 20843 vneg_f32 (float32x2_t __a) 20844 { 20845 return -__a; 20846 } 20847 20848 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) 20849 vneg_f64 (float64x1_t __a) 20850 { 20851 return -__a; 20852 } 20853 20854 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 20855 vneg_s8 (int8x8_t __a) 20856 { 20857 return -__a; 20858 } 20859 20860 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 20861 vneg_s16 (int16x4_t __a) 20862 { 20863 return -__a; 20864 } 20865 20866 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 20867 vneg_s32 (int32x2_t __a) 20868 { 20869 return -__a; 20870 } 20871 20872 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 20873 vneg_s64 (int64x1_t __a) 20874 { 20875 return -__a; 20876 } 20877 20878 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 20879 vnegq_f32 (float32x4_t __a) 20880 { 20881 return -__a; 20882 } 20883 20884 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 20885 vnegq_f64 (float64x2_t __a) 20886 { 20887 return -__a; 20888 } 20889 20890 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 20891 vnegq_s8 (int8x16_t __a) 20892 { 20893 return -__a; 20894 } 20895 20896 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 20897 vnegq_s16 (int16x8_t __a) 20898 { 20899 return -__a; 20900 } 20901 20902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20903 vnegq_s32 (int32x4_t __a) 20904 { 20905 return -__a; 20906 } 20907 20908 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 20909 vnegq_s64 (int64x2_t __a) 20910 { 20911 return -__a; 20912 } 20913 20914 /* vqabs */ 20915 20916 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 20917 vqabsq_s64 (int64x2_t __a) 20918 { 20919 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a); 20920 } 20921 20922 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 20923 vqabsb_s8 (int8_t __a) 20924 { 20925 return (int8_t) __builtin_aarch64_sqabsqi (__a); 20926 } 20927 20928 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 20929 vqabsh_s16 (int16_t __a) 20930 { 20931 return (int16_t) __builtin_aarch64_sqabshi (__a); 20932 } 20933 20934 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 20935 vqabss_s32 (int32_t __a) 20936 { 20937 return (int32_t) __builtin_aarch64_sqabssi (__a); 20938 } 20939 20940 /* vqadd */ 20941 20942 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 20943 vqaddb_s8 (int8_t __a, int8_t __b) 20944 { 20945 return (int8_t) __builtin_aarch64_sqaddqi (__a, __b); 20946 } 20947 20948 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 20949 vqaddh_s16 (int16_t __a, int16_t __b) 20950 { 20951 return (int16_t) __builtin_aarch64_sqaddhi (__a, __b); 20952 } 20953 20954 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 20955 vqadds_s32 (int32_t __a, int32_t __b) 20956 { 20957 return (int32_t) __builtin_aarch64_sqaddsi (__a, __b); 20958 } 20959 20960 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 20961 vqaddd_s64 (int64x1_t __a, int64x1_t __b) 20962 { 20963 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b); 20964 } 20965 20966 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 20967 vqaddb_u8 (uint8_t __a, uint8_t __b) 20968 { 20969 return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b); 20970 } 20971 20972 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 20973 vqaddh_u16 (uint16_t __a, uint16_t __b) 20974 { 20975 return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b); 20976 } 20977 20978 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 20979 vqadds_u32 (uint32_t __a, uint32_t __b) 20980 { 20981 return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b); 20982 } 20983 20984 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 20985 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b) 20986 { 20987 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b); 20988 } 20989 20990 /* vqdmlal */ 20991 20992 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20993 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) 20994 { 20995 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c); 20996 } 20997 20998 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 20999 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) 21000 { 21001 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c); 21002 } 21003 21004 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21005 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, 21006 int const __d) 21007 { 21008 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d); 21009 } 21010 21011 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21012 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, 21013 int const __d) 21014 { 21015 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d); 21016 } 21017 21018 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21019 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) 21020 { 21021 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c); 21022 } 21023 21024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21025 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) 21026 { 21027 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d); 21028 } 21029 21030 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21031 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) 21032 { 21033 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d); 21034 } 21035 21036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21037 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) 21038 { 21039 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c); 21040 } 21041 21042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21043 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) 21044 { 21045 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c); 21046 } 21047 21048 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21049 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) 21050 { 21051 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c); 21052 } 21053 21054 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21055 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, 21056 int const __d) 21057 { 21058 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d); 21059 } 21060 21061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21062 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, 21063 int const __d) 21064 { 21065 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d); 21066 } 21067 21068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21069 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) 21070 { 21071 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c); 21072 } 21073 21074 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21075 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) 21076 { 21077 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d); 21078 } 21079 21080 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21081 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) 21082 { 21083 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d); 21084 } 21085 21086 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21087 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) 21088 { 21089 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c); 21090 } 21091 21092 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21093 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c) 21094 { 21095 return __builtin_aarch64_sqdmlalhi (__a, __b, __c); 21096 } 21097 21098 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21099 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) 21100 { 21101 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); 21102 } 21103 21104 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21105 vqdmlals_s32 (int64x1_t __a, int32_t __b, int32_t __c) 21106 { 21107 return __builtin_aarch64_sqdmlalsi (__a, __b, __c); 21108 } 21109 21110 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21111 vqdmlals_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d) 21112 { 21113 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); 21114 } 21115 21116 /* vqdmlsl */ 21117 21118 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21119 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) 21120 { 21121 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c); 21122 } 21123 21124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21125 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) 21126 { 21127 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c); 21128 } 21129 21130 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21131 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, 21132 int const __d) 21133 { 21134 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d); 21135 } 21136 21137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21138 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, 21139 int const __d) 21140 { 21141 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d); 21142 } 21143 21144 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21145 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) 21146 { 21147 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c); 21148 } 21149 21150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21151 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) 21152 { 21153 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d); 21154 } 21155 21156 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21157 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) 21158 { 21159 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d); 21160 } 21161 21162 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21163 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) 21164 { 21165 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c); 21166 } 21167 21168 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21169 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) 21170 { 21171 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c); 21172 } 21173 21174 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21175 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) 21176 { 21177 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c); 21178 } 21179 21180 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21181 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, 21182 int const __d) 21183 { 21184 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d); 21185 } 21186 21187 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21188 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, 21189 int const __d) 21190 { 21191 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d); 21192 } 21193 21194 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21195 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) 21196 { 21197 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c); 21198 } 21199 21200 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21201 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) 21202 { 21203 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d); 21204 } 21205 21206 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21207 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) 21208 { 21209 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d); 21210 } 21211 21212 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21213 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) 21214 { 21215 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c); 21216 } 21217 21218 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21219 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c) 21220 { 21221 return __builtin_aarch64_sqdmlslhi (__a, __b, __c); 21222 } 21223 21224 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21225 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) 21226 { 21227 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); 21228 } 21229 21230 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21231 vqdmlsls_s32 (int64x1_t __a, int32_t __b, int32_t __c) 21232 { 21233 return __builtin_aarch64_sqdmlslsi (__a, __b, __c); 21234 } 21235 21236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21237 vqdmlsls_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d) 21238 { 21239 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); 21240 } 21241 21242 /* vqdmulh */ 21243 21244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 21245 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) 21246 { 21247 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c); 21248 } 21249 21250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 21251 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) 21252 { 21253 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c); 21254 } 21255 21256 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 21257 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) 21258 { 21259 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c); 21260 } 21261 21262 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21263 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) 21264 { 21265 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); 21266 } 21267 21268 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21269 vqdmulhh_s16 (int16_t __a, int16_t __b) 21270 { 21271 return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b); 21272 } 21273 21274 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21275 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) 21276 { 21277 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); 21278 } 21279 21280 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21281 vqdmulhs_s32 (int32_t __a, int32_t __b) 21282 { 21283 return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b); 21284 } 21285 21286 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21287 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) 21288 { 21289 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); 21290 } 21291 21292 /* vqdmull */ 21293 21294 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21295 vqdmull_s16 (int16x4_t __a, int16x4_t __b) 21296 { 21297 return __builtin_aarch64_sqdmullv4hi (__a, __b); 21298 } 21299 21300 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21301 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b) 21302 { 21303 return __builtin_aarch64_sqdmull2v8hi (__a, __b); 21304 } 21305 21306 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21307 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c) 21308 { 21309 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c); 21310 } 21311 21312 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21313 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c) 21314 { 21315 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c); 21316 } 21317 21318 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21319 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) 21320 { 21321 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b); 21322 } 21323 21324 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21325 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) 21326 { 21327 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c); 21328 } 21329 21330 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21331 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c) 21332 { 21333 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c); 21334 } 21335 21336 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21337 vqdmull_n_s16 (int16x4_t __a, int16_t __b) 21338 { 21339 return __builtin_aarch64_sqdmull_nv4hi (__a, __b); 21340 } 21341 21342 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21343 vqdmull_s32 (int32x2_t __a, int32x2_t __b) 21344 { 21345 return __builtin_aarch64_sqdmullv2si (__a, __b); 21346 } 21347 21348 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21349 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b) 21350 { 21351 return __builtin_aarch64_sqdmull2v4si (__a, __b); 21352 } 21353 21354 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21355 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c) 21356 { 21357 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c); 21358 } 21359 21360 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21361 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c) 21362 { 21363 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c); 21364 } 21365 21366 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21367 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) 21368 { 21369 return __builtin_aarch64_sqdmull2_nv4si (__a, __b); 21370 } 21371 21372 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21373 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) 21374 { 21375 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c); 21376 } 21377 21378 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21379 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c) 21380 { 21381 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c); 21382 } 21383 21384 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21385 vqdmull_n_s32 (int32x2_t __a, int32_t __b) 21386 { 21387 return __builtin_aarch64_sqdmull_nv2si (__a, __b); 21388 } 21389 21390 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21391 vqdmullh_s16 (int16_t __a, int16_t __b) 21392 { 21393 return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b); 21394 } 21395 21396 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21397 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) 21398 { 21399 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); 21400 } 21401 21402 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21403 vqdmulls_s32 (int32_t __a, int32_t __b) 21404 { 21405 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b); 21406 } 21407 21408 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21409 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c) 21410 { 21411 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); 21412 } 21413 21414 /* vqmovn */ 21415 21416 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 21417 vqmovn_s16 (int16x8_t __a) 21418 { 21419 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a); 21420 } 21421 21422 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 21423 vqmovn_s32 (int32x4_t __a) 21424 { 21425 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a); 21426 } 21427 21428 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 21429 vqmovn_s64 (int64x2_t __a) 21430 { 21431 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a); 21432 } 21433 21434 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 21435 vqmovn_u16 (uint16x8_t __a) 21436 { 21437 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a); 21438 } 21439 21440 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 21441 vqmovn_u32 (uint32x4_t __a) 21442 { 21443 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a); 21444 } 21445 21446 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 21447 vqmovn_u64 (uint64x2_t __a) 21448 { 21449 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a); 21450 } 21451 21452 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 21453 vqmovnh_s16 (int16_t __a) 21454 { 21455 return (int8_t) __builtin_aarch64_sqmovnhi (__a); 21456 } 21457 21458 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21459 vqmovns_s32 (int32_t __a) 21460 { 21461 return (int16_t) __builtin_aarch64_sqmovnsi (__a); 21462 } 21463 21464 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21465 vqmovnd_s64 (int64x1_t __a) 21466 { 21467 return (int32_t) __builtin_aarch64_sqmovndi (__a); 21468 } 21469 21470 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 21471 vqmovnh_u16 (uint16_t __a) 21472 { 21473 return (uint8_t) __builtin_aarch64_uqmovnhi (__a); 21474 } 21475 21476 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 21477 vqmovns_u32 (uint32_t __a) 21478 { 21479 return (uint16_t) __builtin_aarch64_uqmovnsi (__a); 21480 } 21481 21482 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 21483 vqmovnd_u64 (uint64x1_t __a) 21484 { 21485 return (uint32_t) __builtin_aarch64_uqmovndi (__a); 21486 } 21487 21488 /* vqmovun */ 21489 21490 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 21491 vqmovun_s16 (int16x8_t __a) 21492 { 21493 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a); 21494 } 21495 21496 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 21497 vqmovun_s32 (int32x4_t __a) 21498 { 21499 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a); 21500 } 21501 21502 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 21503 vqmovun_s64 (int64x2_t __a) 21504 { 21505 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a); 21506 } 21507 21508 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 21509 vqmovunh_s16 (int16_t __a) 21510 { 21511 return (int8_t) __builtin_aarch64_sqmovunhi (__a); 21512 } 21513 21514 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21515 vqmovuns_s32 (int32_t __a) 21516 { 21517 return (int16_t) __builtin_aarch64_sqmovunsi (__a); 21518 } 21519 21520 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21521 vqmovund_s64 (int64x1_t __a) 21522 { 21523 return (int32_t) __builtin_aarch64_sqmovundi (__a); 21524 } 21525 21526 /* vqneg */ 21527 21528 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21529 vqnegq_s64 (int64x2_t __a) 21530 { 21531 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a); 21532 } 21533 21534 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 21535 vqnegb_s8 (int8_t __a) 21536 { 21537 return (int8_t) __builtin_aarch64_sqnegqi (__a); 21538 } 21539 21540 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21541 vqnegh_s16 (int16_t __a) 21542 { 21543 return (int16_t) __builtin_aarch64_sqneghi (__a); 21544 } 21545 21546 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21547 vqnegs_s32 (int32_t __a) 21548 { 21549 return (int32_t) __builtin_aarch64_sqnegsi (__a); 21550 } 21551 21552 /* vqrdmulh */ 21553 21554 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 21555 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) 21556 { 21557 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c); 21558 } 21559 21560 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 21561 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) 21562 { 21563 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c); 21564 } 21565 21566 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 21567 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) 21568 { 21569 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c); 21570 } 21571 21572 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21573 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) 21574 { 21575 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); 21576 } 21577 21578 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21579 vqrdmulhh_s16 (int16_t __a, int16_t __b) 21580 { 21581 return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b); 21582 } 21583 21584 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21585 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) 21586 { 21587 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); 21588 } 21589 21590 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21591 vqrdmulhs_s32 (int32_t __a, int32_t __b) 21592 { 21593 return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b); 21594 } 21595 21596 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21597 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) 21598 { 21599 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); 21600 } 21601 21602 /* vqrshl */ 21603 21604 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 21605 vqrshl_s8 (int8x8_t __a, int8x8_t __b) 21606 { 21607 return __builtin_aarch64_sqrshlv8qi (__a, __b); 21608 } 21609 21610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 21611 vqrshl_s16 (int16x4_t __a, int16x4_t __b) 21612 { 21613 return __builtin_aarch64_sqrshlv4hi (__a, __b); 21614 } 21615 21616 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 21617 vqrshl_s32 (int32x2_t __a, int32x2_t __b) 21618 { 21619 return __builtin_aarch64_sqrshlv2si (__a, __b); 21620 } 21621 21622 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21623 vqrshl_s64 (int64x1_t __a, int64x1_t __b) 21624 { 21625 return __builtin_aarch64_sqrshldi (__a, __b); 21626 } 21627 21628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 21629 vqrshl_u8 (uint8x8_t __a, int8x8_t __b) 21630 { 21631 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b); 21632 } 21633 21634 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 21635 vqrshl_u16 (uint16x4_t __a, int16x4_t __b) 21636 { 21637 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b); 21638 } 21639 21640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 21641 vqrshl_u32 (uint32x2_t __a, int32x2_t __b) 21642 { 21643 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b); 21644 } 21645 21646 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 21647 vqrshl_u64 (uint64x1_t __a, int64x1_t __b) 21648 { 21649 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b); 21650 } 21651 21652 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 21653 vqrshlq_s8 (int8x16_t __a, int8x16_t __b) 21654 { 21655 return __builtin_aarch64_sqrshlv16qi (__a, __b); 21656 } 21657 21658 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 21659 vqrshlq_s16 (int16x8_t __a, int16x8_t __b) 21660 { 21661 return __builtin_aarch64_sqrshlv8hi (__a, __b); 21662 } 21663 21664 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21665 vqrshlq_s32 (int32x4_t __a, int32x4_t __b) 21666 { 21667 return __builtin_aarch64_sqrshlv4si (__a, __b); 21668 } 21669 21670 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21671 vqrshlq_s64 (int64x2_t __a, int64x2_t __b) 21672 { 21673 return __builtin_aarch64_sqrshlv2di (__a, __b); 21674 } 21675 21676 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 21677 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) 21678 { 21679 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b); 21680 } 21681 21682 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 21683 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) 21684 { 21685 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b); 21686 } 21687 21688 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 21689 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) 21690 { 21691 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b); 21692 } 21693 21694 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 21695 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) 21696 { 21697 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b); 21698 } 21699 21700 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 21701 vqrshlb_s8 (int8_t __a, int8_t __b) 21702 { 21703 return __builtin_aarch64_sqrshlqi (__a, __b); 21704 } 21705 21706 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21707 vqrshlh_s16 (int16_t __a, int16_t __b) 21708 { 21709 return __builtin_aarch64_sqrshlhi (__a, __b); 21710 } 21711 21712 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21713 vqrshls_s32 (int32_t __a, int32_t __b) 21714 { 21715 return __builtin_aarch64_sqrshlsi (__a, __b); 21716 } 21717 21718 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21719 vqrshld_s64 (int64x1_t __a, int64x1_t __b) 21720 { 21721 return __builtin_aarch64_sqrshldi (__a, __b); 21722 } 21723 21724 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 21725 vqrshlb_u8 (uint8_t __a, uint8_t __b) 21726 { 21727 return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b); 21728 } 21729 21730 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 21731 vqrshlh_u16 (uint16_t __a, uint16_t __b) 21732 { 21733 return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b); 21734 } 21735 21736 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 21737 vqrshls_u32 (uint32_t __a, uint32_t __b) 21738 { 21739 return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b); 21740 } 21741 21742 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 21743 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b) 21744 { 21745 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b); 21746 } 21747 21748 /* vqrshrn */ 21749 21750 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 21751 vqrshrn_n_s16 (int16x8_t __a, const int __b) 21752 { 21753 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b); 21754 } 21755 21756 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 21757 vqrshrn_n_s32 (int32x4_t __a, const int __b) 21758 { 21759 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b); 21760 } 21761 21762 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 21763 vqrshrn_n_s64 (int64x2_t __a, const int __b) 21764 { 21765 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b); 21766 } 21767 21768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 21769 vqrshrn_n_u16 (uint16x8_t __a, const int __b) 21770 { 21771 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b); 21772 } 21773 21774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 21775 vqrshrn_n_u32 (uint32x4_t __a, const int __b) 21776 { 21777 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b); 21778 } 21779 21780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 21781 vqrshrn_n_u64 (uint64x2_t __a, const int __b) 21782 { 21783 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b); 21784 } 21785 21786 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 21787 vqrshrnh_n_s16 (int16_t __a, const int __b) 21788 { 21789 return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); 21790 } 21791 21792 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21793 vqrshrns_n_s32 (int32_t __a, const int __b) 21794 { 21795 return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); 21796 } 21797 21798 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21799 vqrshrnd_n_s64 (int64x1_t __a, const int __b) 21800 { 21801 return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); 21802 } 21803 21804 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 21805 vqrshrnh_n_u16 (uint16_t __a, const int __b) 21806 { 21807 return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); 21808 } 21809 21810 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 21811 vqrshrns_n_u32 (uint32_t __a, const int __b) 21812 { 21813 return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); 21814 } 21815 21816 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 21817 vqrshrnd_n_u64 (uint64x1_t __a, const int __b) 21818 { 21819 return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); 21820 } 21821 21822 /* vqrshrun */ 21823 21824 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 21825 vqrshrun_n_s16 (int16x8_t __a, const int __b) 21826 { 21827 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b); 21828 } 21829 21830 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 21831 vqrshrun_n_s32 (int32x4_t __a, const int __b) 21832 { 21833 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b); 21834 } 21835 21836 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 21837 vqrshrun_n_s64 (int64x2_t __a, const int __b) 21838 { 21839 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b); 21840 } 21841 21842 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 21843 vqrshrunh_n_s16 (int16_t __a, const int __b) 21844 { 21845 return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); 21846 } 21847 21848 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21849 vqrshruns_n_s32 (int32_t __a, const int __b) 21850 { 21851 return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); 21852 } 21853 21854 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21855 vqrshrund_n_s64 (int64x1_t __a, const int __b) 21856 { 21857 return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); 21858 } 21859 21860 /* vqshl */ 21861 21862 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 21863 vqshl_s8 (int8x8_t __a, int8x8_t __b) 21864 { 21865 return __builtin_aarch64_sqshlv8qi (__a, __b); 21866 } 21867 21868 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 21869 vqshl_s16 (int16x4_t __a, int16x4_t __b) 21870 { 21871 return __builtin_aarch64_sqshlv4hi (__a, __b); 21872 } 21873 21874 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 21875 vqshl_s32 (int32x2_t __a, int32x2_t __b) 21876 { 21877 return __builtin_aarch64_sqshlv2si (__a, __b); 21878 } 21879 21880 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21881 vqshl_s64 (int64x1_t __a, int64x1_t __b) 21882 { 21883 return __builtin_aarch64_sqshldi (__a, __b); 21884 } 21885 21886 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 21887 vqshl_u8 (uint8x8_t __a, int8x8_t __b) 21888 { 21889 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b); 21890 } 21891 21892 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 21893 vqshl_u16 (uint16x4_t __a, int16x4_t __b) 21894 { 21895 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b); 21896 } 21897 21898 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 21899 vqshl_u32 (uint32x2_t __a, int32x2_t __b) 21900 { 21901 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b); 21902 } 21903 21904 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 21905 vqshl_u64 (uint64x1_t __a, int64x1_t __b) 21906 { 21907 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b); 21908 } 21909 21910 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 21911 vqshlq_s8 (int8x16_t __a, int8x16_t __b) 21912 { 21913 return __builtin_aarch64_sqshlv16qi (__a, __b); 21914 } 21915 21916 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 21917 vqshlq_s16 (int16x8_t __a, int16x8_t __b) 21918 { 21919 return __builtin_aarch64_sqshlv8hi (__a, __b); 21920 } 21921 21922 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 21923 vqshlq_s32 (int32x4_t __a, int32x4_t __b) 21924 { 21925 return __builtin_aarch64_sqshlv4si (__a, __b); 21926 } 21927 21928 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 21929 vqshlq_s64 (int64x2_t __a, int64x2_t __b) 21930 { 21931 return __builtin_aarch64_sqshlv2di (__a, __b); 21932 } 21933 21934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 21935 vqshlq_u8 (uint8x16_t __a, int8x16_t __b) 21936 { 21937 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b); 21938 } 21939 21940 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 21941 vqshlq_u16 (uint16x8_t __a, int16x8_t __b) 21942 { 21943 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b); 21944 } 21945 21946 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 21947 vqshlq_u32 (uint32x4_t __a, int32x4_t __b) 21948 { 21949 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b); 21950 } 21951 21952 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 21953 vqshlq_u64 (uint64x2_t __a, int64x2_t __b) 21954 { 21955 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b); 21956 } 21957 21958 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 21959 vqshlb_s8 (int8_t __a, int8_t __b) 21960 { 21961 return __builtin_aarch64_sqshlqi (__a, __b); 21962 } 21963 21964 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 21965 vqshlh_s16 (int16_t __a, int16_t __b) 21966 { 21967 return __builtin_aarch64_sqshlhi (__a, __b); 21968 } 21969 21970 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 21971 vqshls_s32 (int32_t __a, int32_t __b) 21972 { 21973 return __builtin_aarch64_sqshlsi (__a, __b); 21974 } 21975 21976 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 21977 vqshld_s64 (int64x1_t __a, int64x1_t __b) 21978 { 21979 return __builtin_aarch64_sqshldi (__a, __b); 21980 } 21981 21982 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 21983 vqshlb_u8 (uint8_t __a, uint8_t __b) 21984 { 21985 return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b); 21986 } 21987 21988 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 21989 vqshlh_u16 (uint16_t __a, uint16_t __b) 21990 { 21991 return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b); 21992 } 21993 21994 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 21995 vqshls_u32 (uint32_t __a, uint32_t __b) 21996 { 21997 return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b); 21998 } 21999 22000 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22001 vqshld_u64 (uint64x1_t __a, uint64x1_t __b) 22002 { 22003 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b); 22004 } 22005 22006 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 22007 vqshl_n_s8 (int8x8_t __a, const int __b) 22008 { 22009 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b); 22010 } 22011 22012 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 22013 vqshl_n_s16 (int16x4_t __a, const int __b) 22014 { 22015 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b); 22016 } 22017 22018 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 22019 vqshl_n_s32 (int32x2_t __a, const int __b) 22020 { 22021 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b); 22022 } 22023 22024 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22025 vqshl_n_s64 (int64x1_t __a, const int __b) 22026 { 22027 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b); 22028 } 22029 22030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 22031 vqshl_n_u8 (uint8x8_t __a, const int __b) 22032 { 22033 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b); 22034 } 22035 22036 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 22037 vqshl_n_u16 (uint16x4_t __a, const int __b) 22038 { 22039 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b); 22040 } 22041 22042 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 22043 vqshl_n_u32 (uint32x2_t __a, const int __b) 22044 { 22045 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b); 22046 } 22047 22048 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22049 vqshl_n_u64 (uint64x1_t __a, const int __b) 22050 { 22051 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b); 22052 } 22053 22054 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 22055 vqshlq_n_s8 (int8x16_t __a, const int __b) 22056 { 22057 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b); 22058 } 22059 22060 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 22061 vqshlq_n_s16 (int16x8_t __a, const int __b) 22062 { 22063 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b); 22064 } 22065 22066 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 22067 vqshlq_n_s32 (int32x4_t __a, const int __b) 22068 { 22069 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b); 22070 } 22071 22072 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 22073 vqshlq_n_s64 (int64x2_t __a, const int __b) 22074 { 22075 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b); 22076 } 22077 22078 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 22079 vqshlq_n_u8 (uint8x16_t __a, const int __b) 22080 { 22081 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b); 22082 } 22083 22084 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 22085 vqshlq_n_u16 (uint16x8_t __a, const int __b) 22086 { 22087 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b); 22088 } 22089 22090 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 22091 vqshlq_n_u32 (uint32x4_t __a, const int __b) 22092 { 22093 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b); 22094 } 22095 22096 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 22097 vqshlq_n_u64 (uint64x2_t __a, const int __b) 22098 { 22099 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b); 22100 } 22101 22102 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 22103 vqshlb_n_s8 (int8_t __a, const int __b) 22104 { 22105 return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b); 22106 } 22107 22108 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 22109 vqshlh_n_s16 (int16_t __a, const int __b) 22110 { 22111 return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b); 22112 } 22113 22114 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 22115 vqshls_n_s32 (int32_t __a, const int __b) 22116 { 22117 return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b); 22118 } 22119 22120 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22121 vqshld_n_s64 (int64x1_t __a, const int __b) 22122 { 22123 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b); 22124 } 22125 22126 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 22127 vqshlb_n_u8 (uint8_t __a, const int __b) 22128 { 22129 return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b); 22130 } 22131 22132 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 22133 vqshlh_n_u16 (uint16_t __a, const int __b) 22134 { 22135 return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b); 22136 } 22137 22138 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 22139 vqshls_n_u32 (uint32_t __a, const int __b) 22140 { 22141 return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b); 22142 } 22143 22144 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22145 vqshld_n_u64 (uint64x1_t __a, const int __b) 22146 { 22147 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b); 22148 } 22149 22150 /* vqshlu */ 22151 22152 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 22153 vqshlu_n_s8 (int8x8_t __a, const int __b) 22154 { 22155 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b); 22156 } 22157 22158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 22159 vqshlu_n_s16 (int16x4_t __a, const int __b) 22160 { 22161 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b); 22162 } 22163 22164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 22165 vqshlu_n_s32 (int32x2_t __a, const int __b) 22166 { 22167 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b); 22168 } 22169 22170 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22171 vqshlu_n_s64 (int64x1_t __a, const int __b) 22172 { 22173 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); 22174 } 22175 22176 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 22177 vqshluq_n_s8 (int8x16_t __a, const int __b) 22178 { 22179 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b); 22180 } 22181 22182 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 22183 vqshluq_n_s16 (int16x8_t __a, const int __b) 22184 { 22185 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b); 22186 } 22187 22188 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 22189 vqshluq_n_s32 (int32x4_t __a, const int __b) 22190 { 22191 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b); 22192 } 22193 22194 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 22195 vqshluq_n_s64 (int64x2_t __a, const int __b) 22196 { 22197 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b); 22198 } 22199 22200 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 22201 vqshlub_n_s8 (int8_t __a, const int __b) 22202 { 22203 return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b); 22204 } 22205 22206 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 22207 vqshluh_n_s16 (int16_t __a, const int __b) 22208 { 22209 return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b); 22210 } 22211 22212 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 22213 vqshlus_n_s32 (int32_t __a, const int __b) 22214 { 22215 return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b); 22216 } 22217 22218 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22219 vqshlud_n_s64 (int64x1_t __a, const int __b) 22220 { 22221 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); 22222 } 22223 22224 /* vqshrn */ 22225 22226 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 22227 vqshrn_n_s16 (int16x8_t __a, const int __b) 22228 { 22229 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b); 22230 } 22231 22232 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 22233 vqshrn_n_s32 (int32x4_t __a, const int __b) 22234 { 22235 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b); 22236 } 22237 22238 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 22239 vqshrn_n_s64 (int64x2_t __a, const int __b) 22240 { 22241 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b); 22242 } 22243 22244 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 22245 vqshrn_n_u16 (uint16x8_t __a, const int __b) 22246 { 22247 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b); 22248 } 22249 22250 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 22251 vqshrn_n_u32 (uint32x4_t __a, const int __b) 22252 { 22253 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b); 22254 } 22255 22256 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 22257 vqshrn_n_u64 (uint64x2_t __a, const int __b) 22258 { 22259 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b); 22260 } 22261 22262 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 22263 vqshrnh_n_s16 (int16_t __a, const int __b) 22264 { 22265 return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b); 22266 } 22267 22268 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 22269 vqshrns_n_s32 (int32_t __a, const int __b) 22270 { 22271 return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b); 22272 } 22273 22274 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 22275 vqshrnd_n_s64 (int64x1_t __a, const int __b) 22276 { 22277 return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b); 22278 } 22279 22280 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 22281 vqshrnh_n_u16 (uint16_t __a, const int __b) 22282 { 22283 return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b); 22284 } 22285 22286 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 22287 vqshrns_n_u32 (uint32_t __a, const int __b) 22288 { 22289 return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b); 22290 } 22291 22292 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 22293 vqshrnd_n_u64 (uint64x1_t __a, const int __b) 22294 { 22295 return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b); 22296 } 22297 22298 /* vqshrun */ 22299 22300 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 22301 vqshrun_n_s16 (int16x8_t __a, const int __b) 22302 { 22303 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b); 22304 } 22305 22306 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 22307 vqshrun_n_s32 (int32x4_t __a, const int __b) 22308 { 22309 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b); 22310 } 22311 22312 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 22313 vqshrun_n_s64 (int64x2_t __a, const int __b) 22314 { 22315 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b); 22316 } 22317 22318 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 22319 vqshrunh_n_s16 (int16_t __a, const int __b) 22320 { 22321 return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b); 22322 } 22323 22324 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 22325 vqshruns_n_s32 (int32_t __a, const int __b) 22326 { 22327 return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b); 22328 } 22329 22330 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 22331 vqshrund_n_s64 (int64x1_t __a, const int __b) 22332 { 22333 return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b); 22334 } 22335 22336 /* vqsub */ 22337 22338 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 22339 vqsubb_s8 (int8_t __a, int8_t __b) 22340 { 22341 return (int8_t) __builtin_aarch64_sqsubqi (__a, __b); 22342 } 22343 22344 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 22345 vqsubh_s16 (int16_t __a, int16_t __b) 22346 { 22347 return (int16_t) __builtin_aarch64_sqsubhi (__a, __b); 22348 } 22349 22350 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 22351 vqsubs_s32 (int32_t __a, int32_t __b) 22352 { 22353 return (int32_t) __builtin_aarch64_sqsubsi (__a, __b); 22354 } 22355 22356 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22357 vqsubd_s64 (int64x1_t __a, int64x1_t __b) 22358 { 22359 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b); 22360 } 22361 22362 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 22363 vqsubb_u8 (uint8_t __a, uint8_t __b) 22364 { 22365 return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b); 22366 } 22367 22368 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 22369 vqsubh_u16 (uint16_t __a, uint16_t __b) 22370 { 22371 return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b); 22372 } 22373 22374 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 22375 vqsubs_u32 (uint32_t __a, uint32_t __b) 22376 { 22377 return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b); 22378 } 22379 22380 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22381 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b) 22382 { 22383 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); 22384 } 22385 22386 /* vrecpe */ 22387 22388 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 22389 vrecpes_f32 (float32_t __a) 22390 { 22391 return __builtin_aarch64_frecpesf (__a); 22392 } 22393 22394 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 22395 vrecped_f64 (float64_t __a) 22396 { 22397 return __builtin_aarch64_frecpedf (__a); 22398 } 22399 22400 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22401 vrecpe_f32 (float32x2_t __a) 22402 { 22403 return __builtin_aarch64_frecpev2sf (__a); 22404 } 22405 22406 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22407 vrecpeq_f32 (float32x4_t __a) 22408 { 22409 return __builtin_aarch64_frecpev4sf (__a); 22410 } 22411 22412 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22413 vrecpeq_f64 (float64x2_t __a) 22414 { 22415 return __builtin_aarch64_frecpev2df (__a); 22416 } 22417 22418 /* vrecps */ 22419 22420 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 22421 vrecpss_f32 (float32_t __a, float32_t __b) 22422 { 22423 return __builtin_aarch64_frecpssf (__a, __b); 22424 } 22425 22426 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 22427 vrecpsd_f64 (float64_t __a, float64_t __b) 22428 { 22429 return __builtin_aarch64_frecpsdf (__a, __b); 22430 } 22431 22432 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22433 vrecps_f32 (float32x2_t __a, float32x2_t __b) 22434 { 22435 return __builtin_aarch64_frecpsv2sf (__a, __b); 22436 } 22437 22438 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22439 vrecpsq_f32 (float32x4_t __a, float32x4_t __b) 22440 { 22441 return __builtin_aarch64_frecpsv4sf (__a, __b); 22442 } 22443 22444 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22445 vrecpsq_f64 (float64x2_t __a, float64x2_t __b) 22446 { 22447 return __builtin_aarch64_frecpsv2df (__a, __b); 22448 } 22449 22450 /* vrecpx */ 22451 22452 __extension__ static __inline float32_t __attribute__ ((__always_inline__)) 22453 vrecpxs_f32 (float32_t __a) 22454 { 22455 return __builtin_aarch64_frecpxsf (__a); 22456 } 22457 22458 __extension__ static __inline float64_t __attribute__ ((__always_inline__)) 22459 vrecpxd_f64 (float64_t __a) 22460 { 22461 return __builtin_aarch64_frecpxdf (__a); 22462 } 22463 22464 /* vrnd */ 22465 22466 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22467 vrnd_f32 (float32x2_t __a) 22468 { 22469 return __builtin_aarch64_btruncv2sf (__a); 22470 } 22471 22472 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22473 vrndq_f32 (float32x4_t __a) 22474 { 22475 return __builtin_aarch64_btruncv4sf (__a); 22476 } 22477 22478 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22479 vrndq_f64 (float64x2_t __a) 22480 { 22481 return __builtin_aarch64_btruncv2df (__a); 22482 } 22483 22484 /* vrnda */ 22485 22486 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22487 vrnda_f32 (float32x2_t __a) 22488 { 22489 return __builtin_aarch64_roundv2sf (__a); 22490 } 22491 22492 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22493 vrndaq_f32 (float32x4_t __a) 22494 { 22495 return __builtin_aarch64_roundv4sf (__a); 22496 } 22497 22498 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22499 vrndaq_f64 (float64x2_t __a) 22500 { 22501 return __builtin_aarch64_roundv2df (__a); 22502 } 22503 22504 /* vrndi */ 22505 22506 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22507 vrndi_f32 (float32x2_t __a) 22508 { 22509 return __builtin_aarch64_nearbyintv2sf (__a); 22510 } 22511 22512 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22513 vrndiq_f32 (float32x4_t __a) 22514 { 22515 return __builtin_aarch64_nearbyintv4sf (__a); 22516 } 22517 22518 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22519 vrndiq_f64 (float64x2_t __a) 22520 { 22521 return __builtin_aarch64_nearbyintv2df (__a); 22522 } 22523 22524 /* vrndm */ 22525 22526 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22527 vrndm_f32 (float32x2_t __a) 22528 { 22529 return __builtin_aarch64_floorv2sf (__a); 22530 } 22531 22532 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22533 vrndmq_f32 (float32x4_t __a) 22534 { 22535 return __builtin_aarch64_floorv4sf (__a); 22536 } 22537 22538 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22539 vrndmq_f64 (float64x2_t __a) 22540 { 22541 return __builtin_aarch64_floorv2df (__a); 22542 } 22543 22544 /* vrndn */ 22545 22546 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22547 vrndn_f32 (float32x2_t __a) 22548 { 22549 return __builtin_aarch64_frintnv2sf (__a); 22550 } 22551 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22552 vrndnq_f32 (float32x4_t __a) 22553 { 22554 return __builtin_aarch64_frintnv4sf (__a); 22555 } 22556 22557 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22558 vrndnq_f64 (float64x2_t __a) 22559 { 22560 return __builtin_aarch64_frintnv2df (__a); 22561 } 22562 22563 /* vrndp */ 22564 22565 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22566 vrndp_f32 (float32x2_t __a) 22567 { 22568 return __builtin_aarch64_ceilv2sf (__a); 22569 } 22570 22571 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22572 vrndpq_f32 (float32x4_t __a) 22573 { 22574 return __builtin_aarch64_ceilv4sf (__a); 22575 } 22576 22577 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22578 vrndpq_f64 (float64x2_t __a) 22579 { 22580 return __builtin_aarch64_ceilv2df (__a); 22581 } 22582 22583 /* vrndx */ 22584 22585 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 22586 vrndx_f32 (float32x2_t __a) 22587 { 22588 return __builtin_aarch64_rintv2sf (__a); 22589 } 22590 22591 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 22592 vrndxq_f32 (float32x4_t __a) 22593 { 22594 return __builtin_aarch64_rintv4sf (__a); 22595 } 22596 22597 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 22598 vrndxq_f64 (float64x2_t __a) 22599 { 22600 return __builtin_aarch64_rintv2df (__a); 22601 } 22602 22603 /* vrshl */ 22604 22605 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 22606 vrshl_s8 (int8x8_t __a, int8x8_t __b) 22607 { 22608 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b); 22609 } 22610 22611 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 22612 vrshl_s16 (int16x4_t __a, int16x4_t __b) 22613 { 22614 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b); 22615 } 22616 22617 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 22618 vrshl_s32 (int32x2_t __a, int32x2_t __b) 22619 { 22620 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b); 22621 } 22622 22623 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22624 vrshl_s64 (int64x1_t __a, int64x1_t __b) 22625 { 22626 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b); 22627 } 22628 22629 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 22630 vrshl_u8 (uint8x8_t __a, int8x8_t __b) 22631 { 22632 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b); 22633 } 22634 22635 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 22636 vrshl_u16 (uint16x4_t __a, int16x4_t __b) 22637 { 22638 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b); 22639 } 22640 22641 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 22642 vrshl_u32 (uint32x2_t __a, int32x2_t __b) 22643 { 22644 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b); 22645 } 22646 22647 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22648 vrshl_u64 (uint64x1_t __a, int64x1_t __b) 22649 { 22650 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b); 22651 } 22652 22653 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 22654 vrshlq_s8 (int8x16_t __a, int8x16_t __b) 22655 { 22656 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b); 22657 } 22658 22659 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 22660 vrshlq_s16 (int16x8_t __a, int16x8_t __b) 22661 { 22662 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b); 22663 } 22664 22665 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 22666 vrshlq_s32 (int32x4_t __a, int32x4_t __b) 22667 { 22668 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b); 22669 } 22670 22671 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 22672 vrshlq_s64 (int64x2_t __a, int64x2_t __b) 22673 { 22674 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b); 22675 } 22676 22677 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 22678 vrshlq_u8 (uint8x16_t __a, int8x16_t __b) 22679 { 22680 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b); 22681 } 22682 22683 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 22684 vrshlq_u16 (uint16x8_t __a, int16x8_t __b) 22685 { 22686 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b); 22687 } 22688 22689 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 22690 vrshlq_u32 (uint32x4_t __a, int32x4_t __b) 22691 { 22692 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b); 22693 } 22694 22695 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 22696 vrshlq_u64 (uint64x2_t __a, int64x2_t __b) 22697 { 22698 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b); 22699 } 22700 22701 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22702 vrshld_s64 (int64x1_t __a, int64x1_t __b) 22703 { 22704 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b); 22705 } 22706 22707 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22708 vrshld_u64 (uint64x1_t __a, uint64x1_t __b) 22709 { 22710 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b); 22711 } 22712 22713 /* vrshr */ 22714 22715 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 22716 vrshr_n_s8 (int8x8_t __a, const int __b) 22717 { 22718 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b); 22719 } 22720 22721 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 22722 vrshr_n_s16 (int16x4_t __a, const int __b) 22723 { 22724 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b); 22725 } 22726 22727 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 22728 vrshr_n_s32 (int32x2_t __a, const int __b) 22729 { 22730 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b); 22731 } 22732 22733 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22734 vrshr_n_s64 (int64x1_t __a, const int __b) 22735 { 22736 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b); 22737 } 22738 22739 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 22740 vrshr_n_u8 (uint8x8_t __a, const int __b) 22741 { 22742 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b); 22743 } 22744 22745 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 22746 vrshr_n_u16 (uint16x4_t __a, const int __b) 22747 { 22748 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b); 22749 } 22750 22751 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 22752 vrshr_n_u32 (uint32x2_t __a, const int __b) 22753 { 22754 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b); 22755 } 22756 22757 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22758 vrshr_n_u64 (uint64x1_t __a, const int __b) 22759 { 22760 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b); 22761 } 22762 22763 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 22764 vrshrq_n_s8 (int8x16_t __a, const int __b) 22765 { 22766 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b); 22767 } 22768 22769 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 22770 vrshrq_n_s16 (int16x8_t __a, const int __b) 22771 { 22772 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b); 22773 } 22774 22775 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 22776 vrshrq_n_s32 (int32x4_t __a, const int __b) 22777 { 22778 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b); 22779 } 22780 22781 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 22782 vrshrq_n_s64 (int64x2_t __a, const int __b) 22783 { 22784 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b); 22785 } 22786 22787 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 22788 vrshrq_n_u8 (uint8x16_t __a, const int __b) 22789 { 22790 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b); 22791 } 22792 22793 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 22794 vrshrq_n_u16 (uint16x8_t __a, const int __b) 22795 { 22796 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b); 22797 } 22798 22799 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 22800 vrshrq_n_u32 (uint32x4_t __a, const int __b) 22801 { 22802 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b); 22803 } 22804 22805 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 22806 vrshrq_n_u64 (uint64x2_t __a, const int __b) 22807 { 22808 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b); 22809 } 22810 22811 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22812 vrshrd_n_s64 (int64x1_t __a, const int __b) 22813 { 22814 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b); 22815 } 22816 22817 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22818 vrshrd_n_u64 (uint64x1_t __a, const int __b) 22819 { 22820 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b); 22821 } 22822 22823 /* vrsra */ 22824 22825 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 22826 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) 22827 { 22828 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c); 22829 } 22830 22831 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 22832 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) 22833 { 22834 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c); 22835 } 22836 22837 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 22838 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) 22839 { 22840 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c); 22841 } 22842 22843 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22844 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 22845 { 22846 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c); 22847 } 22848 22849 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 22850 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) 22851 { 22852 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a, 22853 (int8x8_t) __b, __c); 22854 } 22855 22856 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 22857 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) 22858 { 22859 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a, 22860 (int16x4_t) __b, __c); 22861 } 22862 22863 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 22864 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) 22865 { 22866 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a, 22867 (int32x2_t) __b, __c); 22868 } 22869 22870 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22871 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 22872 { 22873 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a, 22874 (int64x1_t) __b, __c); 22875 } 22876 22877 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 22878 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) 22879 { 22880 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c); 22881 } 22882 22883 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 22884 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) 22885 { 22886 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c); 22887 } 22888 22889 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 22890 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) 22891 { 22892 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c); 22893 } 22894 22895 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 22896 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) 22897 { 22898 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c); 22899 } 22900 22901 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 22902 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) 22903 { 22904 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a, 22905 (int8x16_t) __b, __c); 22906 } 22907 22908 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 22909 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) 22910 { 22911 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a, 22912 (int16x8_t) __b, __c); 22913 } 22914 22915 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 22916 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) 22917 { 22918 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a, 22919 (int32x4_t) __b, __c); 22920 } 22921 22922 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 22923 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) 22924 { 22925 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a, 22926 (int64x2_t) __b, __c); 22927 } 22928 22929 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 22930 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 22931 { 22932 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c); 22933 } 22934 22935 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 22936 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 22937 { 22938 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c); 22939 } 22940 22941 #ifdef __ARM_FEATURE_CRYPTO 22942 22943 /* vsha1 */ 22944 22945 static __inline uint32x4_t 22946 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) 22947 { 22948 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); 22949 } 22950 static __inline uint32x4_t 22951 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) 22952 { 22953 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); 22954 } 22955 static __inline uint32x4_t 22956 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) 22957 { 22958 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); 22959 } 22960 22961 static __inline uint32_t 22962 vsha1h_u32 (uint32_t hash_e) 22963 { 22964 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); 22965 } 22966 22967 static __inline uint32x4_t 22968 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) 22969 { 22970 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); 22971 } 22972 22973 static __inline uint32x4_t 22974 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) 22975 { 22976 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); 22977 } 22978 22979 static __inline uint32x4_t 22980 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) 22981 { 22982 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); 22983 } 22984 22985 static __inline uint32x4_t 22986 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) 22987 { 22988 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); 22989 } 22990 22991 static __inline uint32x4_t 22992 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) 22993 { 22994 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); 22995 } 22996 22997 static __inline uint32x4_t 22998 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) 22999 { 23000 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); 23001 } 23002 23003 static __inline poly128_t 23004 vmull_p64 (poly64_t a, poly64_t b) 23005 { 23006 return 23007 __builtin_aarch64_crypto_pmulldi_ppp (a, b); 23008 } 23009 23010 static __inline poly128_t 23011 vmull_high_p64 (poly64x2_t a, poly64x2_t b) 23012 { 23013 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); 23014 } 23015 23016 #endif 23017 23018 /* vshl */ 23019 23020 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 23021 vshl_n_s8 (int8x8_t __a, const int __b) 23022 { 23023 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b); 23024 } 23025 23026 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 23027 vshl_n_s16 (int16x4_t __a, const int __b) 23028 { 23029 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b); 23030 } 23031 23032 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 23033 vshl_n_s32 (int32x2_t __a, const int __b) 23034 { 23035 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b); 23036 } 23037 23038 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23039 vshl_n_s64 (int64x1_t __a, const int __b) 23040 { 23041 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); 23042 } 23043 23044 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 23045 vshl_n_u8 (uint8x8_t __a, const int __b) 23046 { 23047 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b); 23048 } 23049 23050 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 23051 vshl_n_u16 (uint16x4_t __a, const int __b) 23052 { 23053 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b); 23054 } 23055 23056 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 23057 vshl_n_u32 (uint32x2_t __a, const int __b) 23058 { 23059 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b); 23060 } 23061 23062 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23063 vshl_n_u64 (uint64x1_t __a, const int __b) 23064 { 23065 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b); 23066 } 23067 23068 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 23069 vshlq_n_s8 (int8x16_t __a, const int __b) 23070 { 23071 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b); 23072 } 23073 23074 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23075 vshlq_n_s16 (int16x8_t __a, const int __b) 23076 { 23077 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b); 23078 } 23079 23080 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23081 vshlq_n_s32 (int32x4_t __a, const int __b) 23082 { 23083 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b); 23084 } 23085 23086 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23087 vshlq_n_s64 (int64x2_t __a, const int __b) 23088 { 23089 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b); 23090 } 23091 23092 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 23093 vshlq_n_u8 (uint8x16_t __a, const int __b) 23094 { 23095 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b); 23096 } 23097 23098 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23099 vshlq_n_u16 (uint16x8_t __a, const int __b) 23100 { 23101 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b); 23102 } 23103 23104 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23105 vshlq_n_u32 (uint32x4_t __a, const int __b) 23106 { 23107 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b); 23108 } 23109 23110 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23111 vshlq_n_u64 (uint64x2_t __a, const int __b) 23112 { 23113 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b); 23114 } 23115 23116 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23117 vshld_n_s64 (int64x1_t __a, const int __b) 23118 { 23119 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); 23120 } 23121 23122 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23123 vshld_n_u64 (uint64x1_t __a, const int __b) 23124 { 23125 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b); 23126 } 23127 23128 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 23129 vshl_s8 (int8x8_t __a, int8x8_t __b) 23130 { 23131 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b); 23132 } 23133 23134 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 23135 vshl_s16 (int16x4_t __a, int16x4_t __b) 23136 { 23137 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b); 23138 } 23139 23140 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 23141 vshl_s32 (int32x2_t __a, int32x2_t __b) 23142 { 23143 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b); 23144 } 23145 23146 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23147 vshl_s64 (int64x1_t __a, int64x1_t __b) 23148 { 23149 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); 23150 } 23151 23152 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 23153 vshl_u8 (uint8x8_t __a, int8x8_t __b) 23154 { 23155 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b); 23156 } 23157 23158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 23159 vshl_u16 (uint16x4_t __a, int16x4_t __b) 23160 { 23161 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b); 23162 } 23163 23164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 23165 vshl_u32 (uint32x2_t __a, int32x2_t __b) 23166 { 23167 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b); 23168 } 23169 23170 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23171 vshl_u64 (uint64x1_t __a, int64x1_t __b) 23172 { 23173 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b); 23174 } 23175 23176 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 23177 vshlq_s8 (int8x16_t __a, int8x16_t __b) 23178 { 23179 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b); 23180 } 23181 23182 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23183 vshlq_s16 (int16x8_t __a, int16x8_t __b) 23184 { 23185 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b); 23186 } 23187 23188 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23189 vshlq_s32 (int32x4_t __a, int32x4_t __b) 23190 { 23191 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b); 23192 } 23193 23194 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23195 vshlq_s64 (int64x2_t __a, int64x2_t __b) 23196 { 23197 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b); 23198 } 23199 23200 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 23201 vshlq_u8 (uint8x16_t __a, int8x16_t __b) 23202 { 23203 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b); 23204 } 23205 23206 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23207 vshlq_u16 (uint16x8_t __a, int16x8_t __b) 23208 { 23209 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b); 23210 } 23211 23212 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23213 vshlq_u32 (uint32x4_t __a, int32x4_t __b) 23214 { 23215 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b); 23216 } 23217 23218 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23219 vshlq_u64 (uint64x2_t __a, int64x2_t __b) 23220 { 23221 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b); 23222 } 23223 23224 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23225 vshld_s64 (int64x1_t __a, int64x1_t __b) 23226 { 23227 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); 23228 } 23229 23230 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23231 vshld_u64 (uint64x1_t __a, uint64x1_t __b) 23232 { 23233 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b); 23234 } 23235 23236 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23237 vshll_high_n_s8 (int8x16_t __a, const int __b) 23238 { 23239 return __builtin_aarch64_sshll2_nv16qi (__a, __b); 23240 } 23241 23242 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23243 vshll_high_n_s16 (int16x8_t __a, const int __b) 23244 { 23245 return __builtin_aarch64_sshll2_nv8hi (__a, __b); 23246 } 23247 23248 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23249 vshll_high_n_s32 (int32x4_t __a, const int __b) 23250 { 23251 return __builtin_aarch64_sshll2_nv4si (__a, __b); 23252 } 23253 23254 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23255 vshll_high_n_u8 (uint8x16_t __a, const int __b) 23256 { 23257 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b); 23258 } 23259 23260 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23261 vshll_high_n_u16 (uint16x8_t __a, const int __b) 23262 { 23263 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b); 23264 } 23265 23266 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23267 vshll_high_n_u32 (uint32x4_t __a, const int __b) 23268 { 23269 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b); 23270 } 23271 23272 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23273 vshll_n_s8 (int8x8_t __a, const int __b) 23274 { 23275 return __builtin_aarch64_sshll_nv8qi (__a, __b); 23276 } 23277 23278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23279 vshll_n_s16 (int16x4_t __a, const int __b) 23280 { 23281 return __builtin_aarch64_sshll_nv4hi (__a, __b); 23282 } 23283 23284 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23285 vshll_n_s32 (int32x2_t __a, const int __b) 23286 { 23287 return __builtin_aarch64_sshll_nv2si (__a, __b); 23288 } 23289 23290 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23291 vshll_n_u8 (uint8x8_t __a, const int __b) 23292 { 23293 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b); 23294 } 23295 23296 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23297 vshll_n_u16 (uint16x4_t __a, const int __b) 23298 { 23299 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b); 23300 } 23301 23302 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23303 vshll_n_u32 (uint32x2_t __a, const int __b) 23304 { 23305 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b); 23306 } 23307 23308 /* vshr */ 23309 23310 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 23311 vshr_n_s8 (int8x8_t __a, const int __b) 23312 { 23313 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b); 23314 } 23315 23316 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 23317 vshr_n_s16 (int16x4_t __a, const int __b) 23318 { 23319 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b); 23320 } 23321 23322 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 23323 vshr_n_s32 (int32x2_t __a, const int __b) 23324 { 23325 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b); 23326 } 23327 23328 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23329 vshr_n_s64 (int64x1_t __a, const int __b) 23330 { 23331 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b); 23332 } 23333 23334 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 23335 vshr_n_u8 (uint8x8_t __a, const int __b) 23336 { 23337 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b); 23338 } 23339 23340 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 23341 vshr_n_u16 (uint16x4_t __a, const int __b) 23342 { 23343 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b); 23344 } 23345 23346 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 23347 vshr_n_u32 (uint32x2_t __a, const int __b) 23348 { 23349 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b); 23350 } 23351 23352 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23353 vshr_n_u64 (uint64x1_t __a, const int __b) 23354 { 23355 return __builtin_aarch64_lshr_simddi_uus ( __a, __b); 23356 } 23357 23358 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 23359 vshrq_n_s8 (int8x16_t __a, const int __b) 23360 { 23361 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b); 23362 } 23363 23364 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23365 vshrq_n_s16 (int16x8_t __a, const int __b) 23366 { 23367 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b); 23368 } 23369 23370 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23371 vshrq_n_s32 (int32x4_t __a, const int __b) 23372 { 23373 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b); 23374 } 23375 23376 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23377 vshrq_n_s64 (int64x2_t __a, const int __b) 23378 { 23379 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b); 23380 } 23381 23382 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 23383 vshrq_n_u8 (uint8x16_t __a, const int __b) 23384 { 23385 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b); 23386 } 23387 23388 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23389 vshrq_n_u16 (uint16x8_t __a, const int __b) 23390 { 23391 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b); 23392 } 23393 23394 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23395 vshrq_n_u32 (uint32x4_t __a, const int __b) 23396 { 23397 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b); 23398 } 23399 23400 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23401 vshrq_n_u64 (uint64x2_t __a, const int __b) 23402 { 23403 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b); 23404 } 23405 23406 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23407 vshrd_n_s64 (int64x1_t __a, const int __b) 23408 { 23409 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b); 23410 } 23411 23412 __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 23413 vshrd_n_u64 (uint64_t __a, const int __b) 23414 { 23415 return __builtin_aarch64_lshr_simddi_uus (__a, __b); 23416 } 23417 23418 /* vsli */ 23419 23420 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 23421 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) 23422 { 23423 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c); 23424 } 23425 23426 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 23427 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) 23428 { 23429 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c); 23430 } 23431 23432 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 23433 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) 23434 { 23435 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c); 23436 } 23437 23438 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23439 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 23440 { 23441 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c); 23442 } 23443 23444 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 23445 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) 23446 { 23447 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a, 23448 (int8x8_t) __b, __c); 23449 } 23450 23451 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 23452 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) 23453 { 23454 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a, 23455 (int16x4_t) __b, __c); 23456 } 23457 23458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 23459 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) 23460 { 23461 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a, 23462 (int32x2_t) __b, __c); 23463 } 23464 23465 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23466 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 23467 { 23468 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a, 23469 (int64x1_t) __b, __c); 23470 } 23471 23472 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 23473 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) 23474 { 23475 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c); 23476 } 23477 23478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23479 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) 23480 { 23481 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c); 23482 } 23483 23484 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23485 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) 23486 { 23487 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c); 23488 } 23489 23490 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23491 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) 23492 { 23493 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c); 23494 } 23495 23496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 23497 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) 23498 { 23499 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a, 23500 (int8x16_t) __b, __c); 23501 } 23502 23503 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23504 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) 23505 { 23506 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a, 23507 (int16x8_t) __b, __c); 23508 } 23509 23510 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23511 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) 23512 { 23513 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a, 23514 (int32x4_t) __b, __c); 23515 } 23516 23517 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23518 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) 23519 { 23520 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a, 23521 (int64x2_t) __b, __c); 23522 } 23523 23524 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23525 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 23526 { 23527 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c); 23528 } 23529 23530 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23531 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 23532 { 23533 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c); 23534 } 23535 23536 /* vsqadd */ 23537 23538 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 23539 vsqadd_u8 (uint8x8_t __a, int8x8_t __b) 23540 { 23541 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a, 23542 (int8x8_t) __b); 23543 } 23544 23545 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 23546 vsqadd_u16 (uint16x4_t __a, int16x4_t __b) 23547 { 23548 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a, 23549 (int16x4_t) __b); 23550 } 23551 23552 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 23553 vsqadd_u32 (uint32x2_t __a, int32x2_t __b) 23554 { 23555 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a, 23556 (int32x2_t) __b); 23557 } 23558 23559 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23560 vsqadd_u64 (uint64x1_t __a, int64x1_t __b) 23561 { 23562 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); 23563 } 23564 23565 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 23566 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b) 23567 { 23568 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a, 23569 (int8x16_t) __b); 23570 } 23571 23572 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23573 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b) 23574 { 23575 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a, 23576 (int16x8_t) __b); 23577 } 23578 23579 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23580 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b) 23581 { 23582 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a, 23583 (int32x4_t) __b); 23584 } 23585 23586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23587 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) 23588 { 23589 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a, 23590 (int64x2_t) __b); 23591 } 23592 23593 __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) 23594 vsqaddb_u8 (uint8_t __a, int8_t __b) 23595 { 23596 return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b); 23597 } 23598 23599 __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) 23600 vsqaddh_u16 (uint16_t __a, int16_t __b) 23601 { 23602 return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b); 23603 } 23604 23605 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 23606 vsqadds_u32 (uint32_t __a, int32_t __b) 23607 { 23608 return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b); 23609 } 23610 23611 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23612 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b) 23613 { 23614 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); 23615 } 23616 23617 /* vsqrt */ 23618 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) 23619 vsqrt_f32 (float32x2_t a) 23620 { 23621 return __builtin_aarch64_sqrtv2sf (a); 23622 } 23623 23624 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) 23625 vsqrtq_f32 (float32x4_t a) 23626 { 23627 return __builtin_aarch64_sqrtv4sf (a); 23628 } 23629 23630 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) 23631 vsqrtq_f64 (float64x2_t a) 23632 { 23633 return __builtin_aarch64_sqrtv2df (a); 23634 } 23635 23636 /* vsra */ 23637 23638 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 23639 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) 23640 { 23641 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c); 23642 } 23643 23644 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 23645 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) 23646 { 23647 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c); 23648 } 23649 23650 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 23651 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) 23652 { 23653 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c); 23654 } 23655 23656 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23657 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 23658 { 23659 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c); 23660 } 23661 23662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 23663 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) 23664 { 23665 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a, 23666 (int8x8_t) __b, __c); 23667 } 23668 23669 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 23670 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) 23671 { 23672 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a, 23673 (int16x4_t) __b, __c); 23674 } 23675 23676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 23677 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) 23678 { 23679 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a, 23680 (int32x2_t) __b, __c); 23681 } 23682 23683 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23684 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 23685 { 23686 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a, 23687 (int64x1_t) __b, __c); 23688 } 23689 23690 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 23691 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) 23692 { 23693 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c); 23694 } 23695 23696 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23697 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) 23698 { 23699 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c); 23700 } 23701 23702 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23703 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) 23704 { 23705 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c); 23706 } 23707 23708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23709 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) 23710 { 23711 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c); 23712 } 23713 23714 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 23715 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) 23716 { 23717 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a, 23718 (int8x16_t) __b, __c); 23719 } 23720 23721 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23722 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) 23723 { 23724 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a, 23725 (int16x8_t) __b, __c); 23726 } 23727 23728 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23729 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) 23730 { 23731 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a, 23732 (int32x4_t) __b, __c); 23733 } 23734 23735 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23736 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) 23737 { 23738 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a, 23739 (int64x2_t) __b, __c); 23740 } 23741 23742 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23743 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 23744 { 23745 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c); 23746 } 23747 23748 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23749 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 23750 { 23751 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c); 23752 } 23753 23754 /* vsri */ 23755 23756 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 23757 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) 23758 { 23759 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c); 23760 } 23761 23762 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 23763 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) 23764 { 23765 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c); 23766 } 23767 23768 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 23769 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) 23770 { 23771 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c); 23772 } 23773 23774 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23775 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 23776 { 23777 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c); 23778 } 23779 23780 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 23781 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) 23782 { 23783 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a, 23784 (int8x8_t) __b, __c); 23785 } 23786 23787 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 23788 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) 23789 { 23790 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a, 23791 (int16x4_t) __b, __c); 23792 } 23793 23794 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 23795 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) 23796 { 23797 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a, 23798 (int32x2_t) __b, __c); 23799 } 23800 23801 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23802 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 23803 { 23804 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a, 23805 (int64x1_t) __b, __c); 23806 } 23807 23808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 23809 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) 23810 { 23811 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c); 23812 } 23813 23814 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 23815 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) 23816 { 23817 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c); 23818 } 23819 23820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 23821 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) 23822 { 23823 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c); 23824 } 23825 23826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 23827 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) 23828 { 23829 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c); 23830 } 23831 23832 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 23833 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) 23834 { 23835 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a, 23836 (int8x16_t) __b, __c); 23837 } 23838 23839 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 23840 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) 23841 { 23842 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a, 23843 (int16x8_t) __b, __c); 23844 } 23845 23846 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 23847 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) 23848 { 23849 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a, 23850 (int32x4_t) __b, __c); 23851 } 23852 23853 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 23854 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) 23855 { 23856 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a, 23857 (int64x2_t) __b, __c); 23858 } 23859 23860 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 23861 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) 23862 { 23863 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c); 23864 } 23865 23866 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 23867 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) 23868 { 23869 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c); 23870 } 23871 23872 /* vst1 */ 23873 23874 __extension__ static __inline void __attribute__ ((__always_inline__)) 23875 vst1_f32 (float32_t *a, float32x2_t b) 23876 { 23877 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b); 23878 } 23879 23880 __extension__ static __inline void __attribute__ ((__always_inline__)) 23881 vst1_f64 (float64_t *a, float64x1_t b) 23882 { 23883 *a = b; 23884 } 23885 23886 __extension__ static __inline void __attribute__ ((__always_inline__)) 23887 vst1_p8 (poly8_t *a, poly8x8_t b) 23888 { 23889 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, 23890 (int8x8_t) b); 23891 } 23892 23893 __extension__ static __inline void __attribute__ ((__always_inline__)) 23894 vst1_p16 (poly16_t *a, poly16x4_t b) 23895 { 23896 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, 23897 (int16x4_t) b); 23898 } 23899 23900 __extension__ static __inline void __attribute__ ((__always_inline__)) 23901 vst1_s8 (int8_t *a, int8x8_t b) 23902 { 23903 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b); 23904 } 23905 23906 __extension__ static __inline void __attribute__ ((__always_inline__)) 23907 vst1_s16 (int16_t *a, int16x4_t b) 23908 { 23909 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b); 23910 } 23911 23912 __extension__ static __inline void __attribute__ ((__always_inline__)) 23913 vst1_s32 (int32_t *a, int32x2_t b) 23914 { 23915 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b); 23916 } 23917 23918 __extension__ static __inline void __attribute__ ((__always_inline__)) 23919 vst1_s64 (int64_t *a, int64x1_t b) 23920 { 23921 *a = b; 23922 } 23923 23924 __extension__ static __inline void __attribute__ ((__always_inline__)) 23925 vst1_u8 (uint8_t *a, uint8x8_t b) 23926 { 23927 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, 23928 (int8x8_t) b); 23929 } 23930 23931 __extension__ static __inline void __attribute__ ((__always_inline__)) 23932 vst1_u16 (uint16_t *a, uint16x4_t b) 23933 { 23934 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, 23935 (int16x4_t) b); 23936 } 23937 23938 __extension__ static __inline void __attribute__ ((__always_inline__)) 23939 vst1_u32 (uint32_t *a, uint32x2_t b) 23940 { 23941 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, 23942 (int32x2_t) b); 23943 } 23944 23945 __extension__ static __inline void __attribute__ ((__always_inline__)) 23946 vst1_u64 (uint64_t *a, uint64x1_t b) 23947 { 23948 *a = b; 23949 } 23950 23951 __extension__ static __inline void __attribute__ ((__always_inline__)) 23952 vst1q_f32 (float32_t *a, float32x4_t b) 23953 { 23954 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b); 23955 } 23956 23957 __extension__ static __inline void __attribute__ ((__always_inline__)) 23958 vst1q_f64 (float64_t *a, float64x2_t b) 23959 { 23960 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); 23961 } 23962 23963 /* vst1q */ 23964 23965 __extension__ static __inline void __attribute__ ((__always_inline__)) 23966 vst1q_p8 (poly8_t *a, poly8x16_t b) 23967 { 23968 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, 23969 (int8x16_t) b); 23970 } 23971 23972 __extension__ static __inline void __attribute__ ((__always_inline__)) 23973 vst1q_p16 (poly16_t *a, poly16x8_t b) 23974 { 23975 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, 23976 (int16x8_t) b); 23977 } 23978 23979 __extension__ static __inline void __attribute__ ((__always_inline__)) 23980 vst1q_s8 (int8_t *a, int8x16_t b) 23981 { 23982 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b); 23983 } 23984 23985 __extension__ static __inline void __attribute__ ((__always_inline__)) 23986 vst1q_s16 (int16_t *a, int16x8_t b) 23987 { 23988 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b); 23989 } 23990 23991 __extension__ static __inline void __attribute__ ((__always_inline__)) 23992 vst1q_s32 (int32_t *a, int32x4_t b) 23993 { 23994 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b); 23995 } 23996 23997 __extension__ static __inline void __attribute__ ((__always_inline__)) 23998 vst1q_s64 (int64_t *a, int64x2_t b) 23999 { 24000 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b); 24001 } 24002 24003 __extension__ static __inline void __attribute__ ((__always_inline__)) 24004 vst1q_u8 (uint8_t *a, uint8x16_t b) 24005 { 24006 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, 24007 (int8x16_t) b); 24008 } 24009 24010 __extension__ static __inline void __attribute__ ((__always_inline__)) 24011 vst1q_u16 (uint16_t *a, uint16x8_t b) 24012 { 24013 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, 24014 (int16x8_t) b); 24015 } 24016 24017 __extension__ static __inline void __attribute__ ((__always_inline__)) 24018 vst1q_u32 (uint32_t *a, uint32x4_t b) 24019 { 24020 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, 24021 (int32x4_t) b); 24022 } 24023 24024 __extension__ static __inline void __attribute__ ((__always_inline__)) 24025 vst1q_u64 (uint64_t *a, uint64x2_t b) 24026 { 24027 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, 24028 (int64x2_t) b); 24029 } 24030 24031 /* vstn */ 24032 24033 __extension__ static __inline void 24034 vst2_s64 (int64_t * __a, int64x1x2_t val) 24035 { 24036 __builtin_aarch64_simd_oi __o; 24037 int64x2x2_t temp; 24038 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); 24039 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); 24040 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); 24041 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); 24042 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); 24043 } 24044 24045 __extension__ static __inline void 24046 vst2_u64 (uint64_t * __a, uint64x1x2_t val) 24047 { 24048 __builtin_aarch64_simd_oi __o; 24049 uint64x2x2_t temp; 24050 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); 24051 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); 24052 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); 24053 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); 24054 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); 24055 } 24056 24057 __extension__ static __inline void 24058 vst2_f64 (float64_t * __a, float64x1x2_t val) 24059 { 24060 __builtin_aarch64_simd_oi __o; 24061 float64x2x2_t temp; 24062 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); 24063 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); 24064 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); 24065 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); 24066 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); 24067 } 24068 24069 __extension__ static __inline void 24070 vst2_s8 (int8_t * __a, int8x8x2_t val) 24071 { 24072 __builtin_aarch64_simd_oi __o; 24073 int8x16x2_t temp; 24074 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); 24075 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); 24076 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); 24077 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); 24078 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24079 } 24080 24081 __extension__ static __inline void __attribute__ ((__always_inline__)) 24082 vst2_p8 (poly8_t * __a, poly8x8x2_t val) 24083 { 24084 __builtin_aarch64_simd_oi __o; 24085 poly8x16x2_t temp; 24086 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); 24087 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); 24088 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); 24089 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); 24090 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24091 } 24092 24093 __extension__ static __inline void __attribute__ ((__always_inline__)) 24094 vst2_s16 (int16_t * __a, int16x4x2_t val) 24095 { 24096 __builtin_aarch64_simd_oi __o; 24097 int16x8x2_t temp; 24098 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); 24099 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); 24100 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); 24101 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); 24102 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24103 } 24104 24105 __extension__ static __inline void __attribute__ ((__always_inline__)) 24106 vst2_p16 (poly16_t * __a, poly16x4x2_t val) 24107 { 24108 __builtin_aarch64_simd_oi __o; 24109 poly16x8x2_t temp; 24110 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); 24111 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); 24112 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); 24113 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); 24114 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24115 } 24116 24117 __extension__ static __inline void __attribute__ ((__always_inline__)) 24118 vst2_s32 (int32_t * __a, int32x2x2_t val) 24119 { 24120 __builtin_aarch64_simd_oi __o; 24121 int32x4x2_t temp; 24122 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); 24123 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); 24124 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); 24125 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); 24126 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); 24127 } 24128 24129 __extension__ static __inline void __attribute__ ((__always_inline__)) 24130 vst2_u8 (uint8_t * __a, uint8x8x2_t val) 24131 { 24132 __builtin_aarch64_simd_oi __o; 24133 uint8x16x2_t temp; 24134 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); 24135 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); 24136 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); 24137 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); 24138 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24139 } 24140 24141 __extension__ static __inline void __attribute__ ((__always_inline__)) 24142 vst2_u16 (uint16_t * __a, uint16x4x2_t val) 24143 { 24144 __builtin_aarch64_simd_oi __o; 24145 uint16x8x2_t temp; 24146 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); 24147 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); 24148 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); 24149 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); 24150 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24151 } 24152 24153 __extension__ static __inline void __attribute__ ((__always_inline__)) 24154 vst2_u32 (uint32_t * __a, uint32x2x2_t val) 24155 { 24156 __builtin_aarch64_simd_oi __o; 24157 uint32x4x2_t temp; 24158 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); 24159 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); 24160 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); 24161 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); 24162 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); 24163 } 24164 24165 __extension__ static __inline void __attribute__ ((__always_inline__)) 24166 vst2_f32 (float32_t * __a, float32x2x2_t val) 24167 { 24168 __builtin_aarch64_simd_oi __o; 24169 float32x4x2_t temp; 24170 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); 24171 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); 24172 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); 24173 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); 24174 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); 24175 } 24176 24177 __extension__ static __inline void __attribute__ ((__always_inline__)) 24178 vst2q_s8 (int8_t * __a, int8x16x2_t val) 24179 { 24180 __builtin_aarch64_simd_oi __o; 24181 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); 24182 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); 24183 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24184 } 24185 24186 __extension__ static __inline void __attribute__ ((__always_inline__)) 24187 vst2q_p8 (poly8_t * __a, poly8x16x2_t val) 24188 { 24189 __builtin_aarch64_simd_oi __o; 24190 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); 24191 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); 24192 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24193 } 24194 24195 __extension__ static __inline void __attribute__ ((__always_inline__)) 24196 vst2q_s16 (int16_t * __a, int16x8x2_t val) 24197 { 24198 __builtin_aarch64_simd_oi __o; 24199 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); 24200 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); 24201 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24202 } 24203 24204 __extension__ static __inline void __attribute__ ((__always_inline__)) 24205 vst2q_p16 (poly16_t * __a, poly16x8x2_t val) 24206 { 24207 __builtin_aarch64_simd_oi __o; 24208 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); 24209 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); 24210 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24211 } 24212 24213 __extension__ static __inline void __attribute__ ((__always_inline__)) 24214 vst2q_s32 (int32_t * __a, int32x4x2_t val) 24215 { 24216 __builtin_aarch64_simd_oi __o; 24217 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); 24218 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); 24219 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); 24220 } 24221 24222 __extension__ static __inline void __attribute__ ((__always_inline__)) 24223 vst2q_s64 (int64_t * __a, int64x2x2_t val) 24224 { 24225 __builtin_aarch64_simd_oi __o; 24226 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); 24227 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); 24228 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); 24229 } 24230 24231 __extension__ static __inline void __attribute__ ((__always_inline__)) 24232 vst2q_u8 (uint8_t * __a, uint8x16x2_t val) 24233 { 24234 __builtin_aarch64_simd_oi __o; 24235 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); 24236 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); 24237 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24238 } 24239 24240 __extension__ static __inline void __attribute__ ((__always_inline__)) 24241 vst2q_u16 (uint16_t * __a, uint16x8x2_t val) 24242 { 24243 __builtin_aarch64_simd_oi __o; 24244 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); 24245 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); 24246 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24247 } 24248 24249 __extension__ static __inline void __attribute__ ((__always_inline__)) 24250 vst2q_u32 (uint32_t * __a, uint32x4x2_t val) 24251 { 24252 __builtin_aarch64_simd_oi __o; 24253 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); 24254 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); 24255 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); 24256 } 24257 24258 __extension__ static __inline void __attribute__ ((__always_inline__)) 24259 vst2q_u64 (uint64_t * __a, uint64x2x2_t val) 24260 { 24261 __builtin_aarch64_simd_oi __o; 24262 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); 24263 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); 24264 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); 24265 } 24266 24267 __extension__ static __inline void __attribute__ ((__always_inline__)) 24268 vst2q_f32 (float32_t * __a, float32x4x2_t val) 24269 { 24270 __builtin_aarch64_simd_oi __o; 24271 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); 24272 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); 24273 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); 24274 } 24275 24276 __extension__ static __inline void __attribute__ ((__always_inline__)) 24277 vst2q_f64 (float64_t * __a, float64x2x2_t val) 24278 { 24279 __builtin_aarch64_simd_oi __o; 24280 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); 24281 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); 24282 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); 24283 } 24284 24285 __extension__ static __inline void 24286 vst3_s64 (int64_t * __a, int64x1x3_t val) 24287 { 24288 __builtin_aarch64_simd_ci __o; 24289 int64x2x3_t temp; 24290 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); 24291 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); 24292 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); 24293 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); 24294 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); 24295 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); 24296 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); 24297 } 24298 24299 __extension__ static __inline void 24300 vst3_u64 (uint64_t * __a, uint64x1x3_t val) 24301 { 24302 __builtin_aarch64_simd_ci __o; 24303 uint64x2x3_t temp; 24304 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); 24305 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); 24306 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); 24307 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); 24308 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); 24309 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); 24310 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); 24311 } 24312 24313 __extension__ static __inline void 24314 vst3_f64 (float64_t * __a, float64x1x3_t val) 24315 { 24316 __builtin_aarch64_simd_ci __o; 24317 float64x2x3_t temp; 24318 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); 24319 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); 24320 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); 24321 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); 24322 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); 24323 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); 24324 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); 24325 } 24326 24327 __extension__ static __inline void 24328 vst3_s8 (int8_t * __a, int8x8x3_t val) 24329 { 24330 __builtin_aarch64_simd_ci __o; 24331 int8x16x3_t temp; 24332 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); 24333 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); 24334 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); 24335 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); 24336 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); 24337 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); 24338 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24339 } 24340 24341 __extension__ static __inline void __attribute__ ((__always_inline__)) 24342 vst3_p8 (poly8_t * __a, poly8x8x3_t val) 24343 { 24344 __builtin_aarch64_simd_ci __o; 24345 poly8x16x3_t temp; 24346 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); 24347 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); 24348 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); 24349 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); 24350 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); 24351 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); 24352 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24353 } 24354 24355 __extension__ static __inline void __attribute__ ((__always_inline__)) 24356 vst3_s16 (int16_t * __a, int16x4x3_t val) 24357 { 24358 __builtin_aarch64_simd_ci __o; 24359 int16x8x3_t temp; 24360 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); 24361 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); 24362 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); 24363 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); 24364 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); 24365 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); 24366 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24367 } 24368 24369 __extension__ static __inline void __attribute__ ((__always_inline__)) 24370 vst3_p16 (poly16_t * __a, poly16x4x3_t val) 24371 { 24372 __builtin_aarch64_simd_ci __o; 24373 poly16x8x3_t temp; 24374 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); 24375 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); 24376 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); 24377 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); 24378 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); 24379 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); 24380 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24381 } 24382 24383 __extension__ static __inline void __attribute__ ((__always_inline__)) 24384 vst3_s32 (int32_t * __a, int32x2x3_t val) 24385 { 24386 __builtin_aarch64_simd_ci __o; 24387 int32x4x3_t temp; 24388 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); 24389 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); 24390 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); 24391 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); 24392 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); 24393 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); 24394 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); 24395 } 24396 24397 __extension__ static __inline void __attribute__ ((__always_inline__)) 24398 vst3_u8 (uint8_t * __a, uint8x8x3_t val) 24399 { 24400 __builtin_aarch64_simd_ci __o; 24401 uint8x16x3_t temp; 24402 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); 24403 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); 24404 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); 24405 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); 24406 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); 24407 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); 24408 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24409 } 24410 24411 __extension__ static __inline void __attribute__ ((__always_inline__)) 24412 vst3_u16 (uint16_t * __a, uint16x4x3_t val) 24413 { 24414 __builtin_aarch64_simd_ci __o; 24415 uint16x8x3_t temp; 24416 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); 24417 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); 24418 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); 24419 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); 24420 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); 24421 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); 24422 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24423 } 24424 24425 __extension__ static __inline void __attribute__ ((__always_inline__)) 24426 vst3_u32 (uint32_t * __a, uint32x2x3_t val) 24427 { 24428 __builtin_aarch64_simd_ci __o; 24429 uint32x4x3_t temp; 24430 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); 24431 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); 24432 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); 24433 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); 24434 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); 24435 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); 24436 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); 24437 } 24438 24439 __extension__ static __inline void __attribute__ ((__always_inline__)) 24440 vst3_f32 (float32_t * __a, float32x2x3_t val) 24441 { 24442 __builtin_aarch64_simd_ci __o; 24443 float32x4x3_t temp; 24444 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); 24445 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); 24446 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); 24447 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); 24448 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); 24449 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); 24450 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); 24451 } 24452 24453 __extension__ static __inline void __attribute__ ((__always_inline__)) 24454 vst3q_s8 (int8_t * __a, int8x16x3_t val) 24455 { 24456 __builtin_aarch64_simd_ci __o; 24457 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); 24458 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); 24459 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); 24460 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24461 } 24462 24463 __extension__ static __inline void __attribute__ ((__always_inline__)) 24464 vst3q_p8 (poly8_t * __a, poly8x16x3_t val) 24465 { 24466 __builtin_aarch64_simd_ci __o; 24467 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); 24468 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); 24469 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); 24470 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24471 } 24472 24473 __extension__ static __inline void __attribute__ ((__always_inline__)) 24474 vst3q_s16 (int16_t * __a, int16x8x3_t val) 24475 { 24476 __builtin_aarch64_simd_ci __o; 24477 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); 24478 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); 24479 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); 24480 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24481 } 24482 24483 __extension__ static __inline void __attribute__ ((__always_inline__)) 24484 vst3q_p16 (poly16_t * __a, poly16x8x3_t val) 24485 { 24486 __builtin_aarch64_simd_ci __o; 24487 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); 24488 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); 24489 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); 24490 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24491 } 24492 24493 __extension__ static __inline void __attribute__ ((__always_inline__)) 24494 vst3q_s32 (int32_t * __a, int32x4x3_t val) 24495 { 24496 __builtin_aarch64_simd_ci __o; 24497 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); 24498 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); 24499 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); 24500 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); 24501 } 24502 24503 __extension__ static __inline void __attribute__ ((__always_inline__)) 24504 vst3q_s64 (int64_t * __a, int64x2x3_t val) 24505 { 24506 __builtin_aarch64_simd_ci __o; 24507 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); 24508 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); 24509 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); 24510 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); 24511 } 24512 24513 __extension__ static __inline void __attribute__ ((__always_inline__)) 24514 vst3q_u8 (uint8_t * __a, uint8x16x3_t val) 24515 { 24516 __builtin_aarch64_simd_ci __o; 24517 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); 24518 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); 24519 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); 24520 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24521 } 24522 24523 __extension__ static __inline void __attribute__ ((__always_inline__)) 24524 vst3q_u16 (uint16_t * __a, uint16x8x3_t val) 24525 { 24526 __builtin_aarch64_simd_ci __o; 24527 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); 24528 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); 24529 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); 24530 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24531 } 24532 24533 __extension__ static __inline void __attribute__ ((__always_inline__)) 24534 vst3q_u32 (uint32_t * __a, uint32x4x3_t val) 24535 { 24536 __builtin_aarch64_simd_ci __o; 24537 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); 24538 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); 24539 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); 24540 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); 24541 } 24542 24543 __extension__ static __inline void __attribute__ ((__always_inline__)) 24544 vst3q_u64 (uint64_t * __a, uint64x2x3_t val) 24545 { 24546 __builtin_aarch64_simd_ci __o; 24547 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); 24548 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); 24549 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); 24550 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); 24551 } 24552 24553 __extension__ static __inline void __attribute__ ((__always_inline__)) 24554 vst3q_f32 (float32_t * __a, float32x4x3_t val) 24555 { 24556 __builtin_aarch64_simd_ci __o; 24557 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); 24558 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); 24559 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); 24560 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); 24561 } 24562 24563 __extension__ static __inline void __attribute__ ((__always_inline__)) 24564 vst3q_f64 (float64_t * __a, float64x2x3_t val) 24565 { 24566 __builtin_aarch64_simd_ci __o; 24567 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); 24568 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); 24569 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); 24570 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); 24571 } 24572 24573 __extension__ static __inline void 24574 vst4_s64 (int64_t * __a, int64x1x4_t val) 24575 { 24576 __builtin_aarch64_simd_xi __o; 24577 int64x2x4_t temp; 24578 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); 24579 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); 24580 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); 24581 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); 24582 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); 24583 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); 24584 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); 24585 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); 24586 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); 24587 } 24588 24589 __extension__ static __inline void 24590 vst4_u64 (uint64_t * __a, uint64x1x4_t val) 24591 { 24592 __builtin_aarch64_simd_xi __o; 24593 uint64x2x4_t temp; 24594 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); 24595 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); 24596 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); 24597 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); 24598 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); 24599 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); 24600 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); 24601 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); 24602 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); 24603 } 24604 24605 __extension__ static __inline void 24606 vst4_f64 (float64_t * __a, float64x1x4_t val) 24607 { 24608 __builtin_aarch64_simd_xi __o; 24609 float64x2x4_t temp; 24610 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); 24611 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); 24612 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); 24613 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); 24614 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); 24615 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); 24616 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); 24617 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3); 24618 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); 24619 } 24620 24621 __extension__ static __inline void 24622 vst4_s8 (int8_t * __a, int8x8x4_t val) 24623 { 24624 __builtin_aarch64_simd_xi __o; 24625 int8x16x4_t temp; 24626 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); 24627 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); 24628 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); 24629 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); 24630 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); 24631 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); 24632 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); 24633 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); 24634 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24635 } 24636 24637 __extension__ static __inline void __attribute__ ((__always_inline__)) 24638 vst4_p8 (poly8_t * __a, poly8x8x4_t val) 24639 { 24640 __builtin_aarch64_simd_xi __o; 24641 poly8x16x4_t temp; 24642 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); 24643 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); 24644 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); 24645 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); 24646 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); 24647 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); 24648 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); 24649 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); 24650 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24651 } 24652 24653 __extension__ static __inline void __attribute__ ((__always_inline__)) 24654 vst4_s16 (int16_t * __a, int16x4x4_t val) 24655 { 24656 __builtin_aarch64_simd_xi __o; 24657 int16x8x4_t temp; 24658 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); 24659 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); 24660 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); 24661 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); 24662 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); 24663 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); 24664 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); 24665 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); 24666 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24667 } 24668 24669 __extension__ static __inline void __attribute__ ((__always_inline__)) 24670 vst4_p16 (poly16_t * __a, poly16x4x4_t val) 24671 { 24672 __builtin_aarch64_simd_xi __o; 24673 poly16x8x4_t temp; 24674 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); 24675 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); 24676 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); 24677 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); 24678 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); 24679 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); 24680 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); 24681 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); 24682 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24683 } 24684 24685 __extension__ static __inline void __attribute__ ((__always_inline__)) 24686 vst4_s32 (int32_t * __a, int32x2x4_t val) 24687 { 24688 __builtin_aarch64_simd_xi __o; 24689 int32x4x4_t temp; 24690 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); 24691 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); 24692 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); 24693 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); 24694 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); 24695 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); 24696 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); 24697 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); 24698 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); 24699 } 24700 24701 __extension__ static __inline void __attribute__ ((__always_inline__)) 24702 vst4_u8 (uint8_t * __a, uint8x8x4_t val) 24703 { 24704 __builtin_aarch64_simd_xi __o; 24705 uint8x16x4_t temp; 24706 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); 24707 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); 24708 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); 24709 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); 24710 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); 24711 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); 24712 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); 24713 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); 24714 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); 24715 } 24716 24717 __extension__ static __inline void __attribute__ ((__always_inline__)) 24718 vst4_u16 (uint16_t * __a, uint16x4x4_t val) 24719 { 24720 __builtin_aarch64_simd_xi __o; 24721 uint16x8x4_t temp; 24722 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); 24723 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); 24724 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); 24725 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); 24726 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); 24727 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); 24728 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); 24729 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); 24730 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); 24731 } 24732 24733 __extension__ static __inline void __attribute__ ((__always_inline__)) 24734 vst4_u32 (uint32_t * __a, uint32x2x4_t val) 24735 { 24736 __builtin_aarch64_simd_xi __o; 24737 uint32x4x4_t temp; 24738 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); 24739 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); 24740 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); 24741 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); 24742 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); 24743 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); 24744 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); 24745 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); 24746 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); 24747 } 24748 24749 __extension__ static __inline void __attribute__ ((__always_inline__)) 24750 vst4_f32 (float32_t * __a, float32x2x4_t val) 24751 { 24752 __builtin_aarch64_simd_xi __o; 24753 float32x4x4_t temp; 24754 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); 24755 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); 24756 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); 24757 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); 24758 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); 24759 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); 24760 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); 24761 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3); 24762 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); 24763 } 24764 24765 __extension__ static __inline void __attribute__ ((__always_inline__)) 24766 vst4q_s8 (int8_t * __a, int8x16x4_t val) 24767 { 24768 __builtin_aarch64_simd_xi __o; 24769 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); 24770 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); 24771 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); 24772 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); 24773 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24774 } 24775 24776 __extension__ static __inline void __attribute__ ((__always_inline__)) 24777 vst4q_p8 (poly8_t * __a, poly8x16x4_t val) 24778 { 24779 __builtin_aarch64_simd_xi __o; 24780 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); 24781 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); 24782 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); 24783 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); 24784 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24785 } 24786 24787 __extension__ static __inline void __attribute__ ((__always_inline__)) 24788 vst4q_s16 (int16_t * __a, int16x8x4_t val) 24789 { 24790 __builtin_aarch64_simd_xi __o; 24791 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); 24792 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); 24793 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); 24794 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); 24795 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24796 } 24797 24798 __extension__ static __inline void __attribute__ ((__always_inline__)) 24799 vst4q_p16 (poly16_t * __a, poly16x8x4_t val) 24800 { 24801 __builtin_aarch64_simd_xi __o; 24802 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); 24803 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); 24804 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); 24805 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); 24806 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24807 } 24808 24809 __extension__ static __inline void __attribute__ ((__always_inline__)) 24810 vst4q_s32 (int32_t * __a, int32x4x4_t val) 24811 { 24812 __builtin_aarch64_simd_xi __o; 24813 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); 24814 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); 24815 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); 24816 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); 24817 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); 24818 } 24819 24820 __extension__ static __inline void __attribute__ ((__always_inline__)) 24821 vst4q_s64 (int64_t * __a, int64x2x4_t val) 24822 { 24823 __builtin_aarch64_simd_xi __o; 24824 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); 24825 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); 24826 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); 24827 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); 24828 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); 24829 } 24830 24831 __extension__ static __inline void __attribute__ ((__always_inline__)) 24832 vst4q_u8 (uint8_t * __a, uint8x16x4_t val) 24833 { 24834 __builtin_aarch64_simd_xi __o; 24835 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); 24836 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); 24837 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); 24838 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); 24839 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); 24840 } 24841 24842 __extension__ static __inline void __attribute__ ((__always_inline__)) 24843 vst4q_u16 (uint16_t * __a, uint16x8x4_t val) 24844 { 24845 __builtin_aarch64_simd_xi __o; 24846 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); 24847 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); 24848 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); 24849 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); 24850 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); 24851 } 24852 24853 __extension__ static __inline void __attribute__ ((__always_inline__)) 24854 vst4q_u32 (uint32_t * __a, uint32x4x4_t val) 24855 { 24856 __builtin_aarch64_simd_xi __o; 24857 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); 24858 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); 24859 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); 24860 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); 24861 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); 24862 } 24863 24864 __extension__ static __inline void __attribute__ ((__always_inline__)) 24865 vst4q_u64 (uint64_t * __a, uint64x2x4_t val) 24866 { 24867 __builtin_aarch64_simd_xi __o; 24868 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); 24869 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); 24870 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); 24871 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); 24872 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); 24873 } 24874 24875 __extension__ static __inline void __attribute__ ((__always_inline__)) 24876 vst4q_f32 (float32_t * __a, float32x4x4_t val) 24877 { 24878 __builtin_aarch64_simd_xi __o; 24879 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0); 24880 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1); 24881 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2); 24882 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3); 24883 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); 24884 } 24885 24886 __extension__ static __inline void __attribute__ ((__always_inline__)) 24887 vst4q_f64 (float64_t * __a, float64x2x4_t val) 24888 { 24889 __builtin_aarch64_simd_xi __o; 24890 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0); 24891 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1); 24892 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2); 24893 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3); 24894 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); 24895 } 24896 24897 /* vsub */ 24898 24899 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 24900 vsubd_s64 (int64x1_t __a, int64x1_t __b) 24901 { 24902 return __a - __b; 24903 } 24904 24905 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 24906 vsubd_u64 (uint64x1_t __a, uint64x1_t __b) 24907 { 24908 return __a - __b; 24909 } 24910 24911 /* vtbx1 */ 24912 24913 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 24914 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx) 24915 { 24916 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), 24917 vmov_n_u8 (8)); 24918 int8x8_t __tbl = vtbl1_s8 (__tab, __idx); 24919 24920 return vbsl_s8 (__mask, __tbl, __r); 24921 } 24922 24923 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 24924 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) 24925 { 24926 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); 24927 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); 24928 24929 return vbsl_u8 (__mask, __tbl, __r); 24930 } 24931 24932 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 24933 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) 24934 { 24935 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); 24936 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); 24937 24938 return vbsl_p8 (__mask, __tbl, __r); 24939 } 24940 24941 /* vtbx3 */ 24942 24943 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 24944 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) 24945 { 24946 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), 24947 vmov_n_u8 (24)); 24948 int8x8_t __tbl = vtbl3_s8 (__tab, __idx); 24949 24950 return vbsl_s8 (__mask, __tbl, __r); 24951 } 24952 24953 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 24954 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) 24955 { 24956 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); 24957 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx); 24958 24959 return vbsl_u8 (__mask, __tbl, __r); 24960 } 24961 24962 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) 24963 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) 24964 { 24965 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); 24966 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx); 24967 24968 return vbsl_p8 (__mask, __tbl, __r); 24969 } 24970 24971 /* vtrn */ 24972 24973 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) 24974 vtrn_f32 (float32x2_t a, float32x2_t b) 24975 { 24976 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; 24977 } 24978 24979 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) 24980 vtrn_p8 (poly8x8_t a, poly8x8_t b) 24981 { 24982 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; 24983 } 24984 24985 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) 24986 vtrn_p16 (poly16x4_t a, poly16x4_t b) 24987 { 24988 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; 24989 } 24990 24991 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) 24992 vtrn_s8 (int8x8_t a, int8x8_t b) 24993 { 24994 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; 24995 } 24996 24997 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) 24998 vtrn_s16 (int16x4_t a, int16x4_t b) 24999 { 25000 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; 25001 } 25002 25003 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) 25004 vtrn_s32 (int32x2_t a, int32x2_t b) 25005 { 25006 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; 25007 } 25008 25009 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) 25010 vtrn_u8 (uint8x8_t a, uint8x8_t b) 25011 { 25012 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; 25013 } 25014 25015 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) 25016 vtrn_u16 (uint16x4_t a, uint16x4_t b) 25017 { 25018 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; 25019 } 25020 25021 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) 25022 vtrn_u32 (uint32x2_t a, uint32x2_t b) 25023 { 25024 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; 25025 } 25026 25027 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) 25028 vtrnq_f32 (float32x4_t a, float32x4_t b) 25029 { 25030 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; 25031 } 25032 25033 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) 25034 vtrnq_p8 (poly8x16_t a, poly8x16_t b) 25035 { 25036 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; 25037 } 25038 25039 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) 25040 vtrnq_p16 (poly16x8_t a, poly16x8_t b) 25041 { 25042 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; 25043 } 25044 25045 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) 25046 vtrnq_s8 (int8x16_t a, int8x16_t b) 25047 { 25048 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; 25049 } 25050 25051 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) 25052 vtrnq_s16 (int16x8_t a, int16x8_t b) 25053 { 25054 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; 25055 } 25056 25057 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) 25058 vtrnq_s32 (int32x4_t a, int32x4_t b) 25059 { 25060 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; 25061 } 25062 25063 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) 25064 vtrnq_u8 (uint8x16_t a, uint8x16_t b) 25065 { 25066 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; 25067 } 25068 25069 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) 25070 vtrnq_u16 (uint16x8_t a, uint16x8_t b) 25071 { 25072 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; 25073 } 25074 25075 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) 25076 vtrnq_u32 (uint32x4_t a, uint32x4_t b) 25077 { 25078 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; 25079 } 25080 25081 /* vtst */ 25082 25083 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 25084 vtst_s8 (int8x8_t __a, int8x8_t __b) 25085 { 25086 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b); 25087 } 25088 25089 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 25090 vtst_s16 (int16x4_t __a, int16x4_t __b) 25091 { 25092 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b); 25093 } 25094 25095 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 25096 vtst_s32 (int32x2_t __a, int32x2_t __b) 25097 { 25098 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b); 25099 } 25100 25101 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 25102 vtst_s64 (int64x1_t __a, int64x1_t __b) 25103 { 25104 return (__a & __b) ? -1ll : 0ll; 25105 } 25106 25107 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 25108 vtst_u8 (uint8x8_t __a, uint8x8_t __b) 25109 { 25110 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a, 25111 (int8x8_t) __b); 25112 } 25113 25114 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 25115 vtst_u16 (uint16x4_t __a, uint16x4_t __b) 25116 { 25117 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a, 25118 (int16x4_t) __b); 25119 } 25120 25121 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 25122 vtst_u32 (uint32x2_t __a, uint32x2_t __b) 25123 { 25124 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a, 25125 (int32x2_t) __b); 25126 } 25127 25128 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 25129 vtst_u64 (uint64x1_t __a, uint64x1_t __b) 25130 { 25131 return (__a & __b) ? -1ll : 0ll; 25132 } 25133 25134 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 25135 vtstq_s8 (int8x16_t __a, int8x16_t __b) 25136 { 25137 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b); 25138 } 25139 25140 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 25141 vtstq_s16 (int16x8_t __a, int16x8_t __b) 25142 { 25143 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b); 25144 } 25145 25146 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 25147 vtstq_s32 (int32x4_t __a, int32x4_t __b) 25148 { 25149 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b); 25150 } 25151 25152 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 25153 vtstq_s64 (int64x2_t __a, int64x2_t __b) 25154 { 25155 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b); 25156 } 25157 25158 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 25159 vtstq_u8 (uint8x16_t __a, uint8x16_t __b) 25160 { 25161 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a, 25162 (int8x16_t) __b); 25163 } 25164 25165 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) 25166 vtstq_u16 (uint16x8_t __a, uint16x8_t __b) 25167 { 25168 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a, 25169 (int16x8_t) __b); 25170 } 25171 25172 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 25173 vtstq_u32 (uint32x4_t __a, uint32x4_t __b) 25174 { 25175 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a, 25176 (int32x4_t) __b); 25177 } 25178 25179 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) 25180 vtstq_u64 (uint64x2_t __a, uint64x2_t __b) 25181 { 25182 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a, 25183 (int64x2_t) __b); 25184 } 25185 25186 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 25187 vtstd_s64 (int64x1_t __a, int64x1_t __b) 25188 { 25189 return (__a & __b) ? -1ll : 0ll; 25190 } 25191 25192 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 25193 vtstd_u64 (uint64x1_t __a, uint64x1_t __b) 25194 { 25195 return (__a & __b) ? -1ll : 0ll; 25196 } 25197 25198 /* vuqadd */ 25199 25200 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 25201 vuqadd_s8 (int8x8_t __a, uint8x8_t __b) 25202 { 25203 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); 25204 } 25205 25206 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 25207 vuqadd_s16 (int16x4_t __a, uint16x4_t __b) 25208 { 25209 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); 25210 } 25211 25212 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 25213 vuqadd_s32 (int32x2_t __a, uint32x2_t __b) 25214 { 25215 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); 25216 } 25217 25218 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 25219 vuqadd_s64 (int64x1_t __a, uint64x1_t __b) 25220 { 25221 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); 25222 } 25223 25224 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) 25225 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) 25226 { 25227 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); 25228 } 25229 25230 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) 25231 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) 25232 { 25233 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); 25234 } 25235 25236 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) 25237 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) 25238 { 25239 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); 25240 } 25241 25242 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) 25243 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) 25244 { 25245 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); 25246 } 25247 25248 __extension__ static __inline int8_t __attribute__ ((__always_inline__)) 25249 vuqaddb_s8 (int8_t __a, uint8_t __b) 25250 { 25251 return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b); 25252 } 25253 25254 __extension__ static __inline int16_t __attribute__ ((__always_inline__)) 25255 vuqaddh_s16 (int16_t __a, uint16_t __b) 25256 { 25257 return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b); 25258 } 25259 25260 __extension__ static __inline int32_t __attribute__ ((__always_inline__)) 25261 vuqadds_s32 (int32_t __a, uint32_t __b) 25262 { 25263 return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b); 25264 } 25265 25266 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) 25267 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) 25268 { 25269 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); 25270 } 25271 25272 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ 25273 __extension__ static __inline rettype \ 25274 __attribute__ ((__always_inline__)) \ 25275 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \ 25276 { \ 25277 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \ 25278 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \ 25279 } 25280 25281 #define __INTERLEAVE_LIST(op) \ 25282 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \ 25283 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \ 25284 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \ 25285 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \ 25286 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \ 25287 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \ 25288 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \ 25289 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \ 25290 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \ 25291 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \ 25292 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \ 25293 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \ 25294 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \ 25295 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \ 25296 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \ 25297 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \ 25298 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \ 25299 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q) 25300 25301 /* vuzp */ 25302 25303 __INTERLEAVE_LIST (uzp) 25304 25305 /* vzip */ 25306 25307 __INTERLEAVE_LIST (zip) 25308 25309 #undef __INTERLEAVE_LIST 25310 #undef __DEFINTERLEAVE 25311 25312 /* End of optimal implementations in approved order. */ 25313 25314 #undef __aarch64_vget_lane_any 25315 #undef __aarch64_vget_lane_f32 25316 #undef __aarch64_vget_lane_f64 25317 #undef __aarch64_vget_lane_p8 25318 #undef __aarch64_vget_lane_p16 25319 #undef __aarch64_vget_lane_s8 25320 #undef __aarch64_vget_lane_s16 25321 #undef __aarch64_vget_lane_s32 25322 #undef __aarch64_vget_lane_s64 25323 #undef __aarch64_vget_lane_u8 25324 #undef __aarch64_vget_lane_u16 25325 #undef __aarch64_vget_lane_u32 25326 #undef __aarch64_vget_lane_u64 25327 25328 #undef __aarch64_vgetq_lane_f32 25329 #undef __aarch64_vgetq_lane_f64 25330 #undef __aarch64_vgetq_lane_p8 25331 #undef __aarch64_vgetq_lane_p16 25332 #undef __aarch64_vgetq_lane_s8 25333 #undef __aarch64_vgetq_lane_s16 25334 #undef __aarch64_vgetq_lane_s32 25335 #undef __aarch64_vgetq_lane_s64 25336 #undef __aarch64_vgetq_lane_u8 25337 #undef __aarch64_vgetq_lane_u16 25338 #undef __aarch64_vgetq_lane_u32 25339 #undef __aarch64_vgetq_lane_u64 25340 25341 #undef __aarch64_vdup_lane_any 25342 #undef __aarch64_vdup_lane_f32 25343 #undef __aarch64_vdup_lane_f64 25344 #undef __aarch64_vdup_lane_p8 25345 #undef __aarch64_vdup_lane_p16 25346 #undef __aarch64_vdup_lane_s8 25347 #undef __aarch64_vdup_lane_s16 25348 #undef __aarch64_vdup_lane_s32 25349 #undef __aarch64_vdup_lane_s64 25350 #undef __aarch64_vdup_lane_u8 25351 #undef __aarch64_vdup_lane_u16 25352 #undef __aarch64_vdup_lane_u32 25353 #undef __aarch64_vdup_lane_u64 25354 #undef __aarch64_vdup_laneq_f32 25355 #undef __aarch64_vdup_laneq_f64 25356 #undef __aarch64_vdup_laneq_p8 25357 #undef __aarch64_vdup_laneq_p16 25358 #undef __aarch64_vdup_laneq_s8 25359 #undef __aarch64_vdup_laneq_s16 25360 #undef __aarch64_vdup_laneq_s32 25361 #undef __aarch64_vdup_laneq_s64 25362 #undef __aarch64_vdup_laneq_u8 25363 #undef __aarch64_vdup_laneq_u16 25364 #undef __aarch64_vdup_laneq_u32 25365 #undef __aarch64_vdup_laneq_u64 25366 #undef __aarch64_vdupq_lane_f32 25367 #undef __aarch64_vdupq_lane_f64 25368 #undef __aarch64_vdupq_lane_p8 25369 #undef __aarch64_vdupq_lane_p16 25370 #undef __aarch64_vdupq_lane_s8 25371 #undef __aarch64_vdupq_lane_s16 25372 #undef __aarch64_vdupq_lane_s32 25373 #undef __aarch64_vdupq_lane_s64 25374 #undef __aarch64_vdupq_lane_u8 25375 #undef __aarch64_vdupq_lane_u16 25376 #undef __aarch64_vdupq_lane_u32 25377 #undef __aarch64_vdupq_lane_u64 25378 #undef __aarch64_vdupq_laneq_f32 25379 #undef __aarch64_vdupq_laneq_f64 25380 #undef __aarch64_vdupq_laneq_p8 25381 #undef __aarch64_vdupq_laneq_p16 25382 #undef __aarch64_vdupq_laneq_s8 25383 #undef __aarch64_vdupq_laneq_s16 25384 #undef __aarch64_vdupq_laneq_s32 25385 #undef __aarch64_vdupq_laneq_s64 25386 #undef __aarch64_vdupq_laneq_u8 25387 #undef __aarch64_vdupq_laneq_u16 25388 #undef __aarch64_vdupq_laneq_u32 25389 #undef __aarch64_vdupq_laneq_u64 25390 25391 #endif 25392