Home | History | Annotate | Download | only in include
      1 /* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations.
      2 
      3    Copyright (C) 2008-2014 Free Software Foundation, Inc.
      4    Contributed by CodeSourcery.
      5 
      6    This file is part of GCC.
      7 
      8    GCC is free software; you can redistribute it and/or modify it
      9    under the terms of the GNU General Public License as published
     10    by the Free Software Foundation; either version 3, or (at your
     11    option) any later version.
     12 
     13    GCC is distributed in the hope that it will be useful, but WITHOUT
     14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     16    License for more details.
     17 
     18    Under Section 7 of GPL version 3, you are granted additional
     19    permissions described in the GCC Runtime Library Exception, version
     20    3.1, as published by the Free Software Foundation.
     21 
     22    You should have received a copy of the GNU General Public License and
     23    a copy of the GCC Runtime Library Exception along with this program;
     24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25    <http://www.gnu.org/licenses/>.  */
     26 
     27 #ifndef _GCC_LOONGSON_H
     28 #define _GCC_LOONGSON_H
     29 
     30 #if !defined(__mips_loongson_vector_rev)
     31 # error "You must select -march=loongson2e or -march=loongson2f to use loongson.h"
     32 #endif
     33 
     34 #ifdef __cplusplus
     35 extern "C" {
     36 #endif
     37 
     38 #include <stdint.h>
     39 
     40 /* Vectors of unsigned bytes, halfwords and words.  */
     41 typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
     42 typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
     43 typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
     44 
     45 /* Vectors of signed bytes, halfwords and words.  */
     46 typedef int8_t int8x8_t __attribute__((vector_size (8)));
     47 typedef int16_t int16x4_t __attribute__((vector_size (8)));
     48 typedef int32_t int32x2_t __attribute__((vector_size (8)));
     49 
     50 /* SIMD intrinsics.
     51    Unless otherwise noted, calls to the functions below will expand into
     52    precisely one machine instruction, modulo any moves required to
     53    satisfy register allocation constraints.  */
     54 
     55 /* Pack with signed saturation.  */
     56 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     57 packsswh (int32x2_t s, int32x2_t t)
     58 {
     59   return __builtin_loongson_packsswh (s, t);
     60 }
     61 
     62 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     63 packsshb (int16x4_t s, int16x4_t t)
     64 {
     65   return __builtin_loongson_packsshb (s, t);
     66 }
     67 
     68 /* Pack with unsigned saturation.  */
     69 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     70 packushb (uint16x4_t s, uint16x4_t t)
     71 {
     72   return __builtin_loongson_packushb (s, t);
     73 }
     74 
     75 /* Vector addition, treating overflow by wraparound.  */
     76 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     77 paddw_u (uint32x2_t s, uint32x2_t t)
     78 {
     79   return __builtin_loongson_paddw_u (s, t);
     80 }
     81 
     82 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     83 paddh_u (uint16x4_t s, uint16x4_t t)
     84 {
     85   return __builtin_loongson_paddh_u (s, t);
     86 }
     87 
     88 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     89 paddb_u (uint8x8_t s, uint8x8_t t)
     90 {
     91   return __builtin_loongson_paddb_u (s, t);
     92 }
     93 
     94 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     95 paddw_s (int32x2_t s, int32x2_t t)
     96 {
     97   return __builtin_loongson_paddw_s (s, t);
     98 }
     99 
    100 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    101 paddh_s (int16x4_t s, int16x4_t t)
    102 {
    103   return __builtin_loongson_paddh_s (s, t);
    104 }
    105 
    106 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    107 paddb_s (int8x8_t s, int8x8_t t)
    108 {
    109   return __builtin_loongson_paddb_s (s, t);
    110 }
    111 
    112 /* Addition of doubleword integers, treating overflow by wraparound.  */
    113 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
    114 paddd_u (uint64_t s, uint64_t t)
    115 {
    116   return __builtin_loongson_paddd_u (s, t);
    117 }
    118 
    119 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    120 paddd_s (int64_t s, int64_t t)
    121 {
    122   return __builtin_loongson_paddd_s (s, t);
    123 }
    124 
    125 /* Vector addition, treating overflow by signed saturation.  */
    126 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    127 paddsh (int16x4_t s, int16x4_t t)
    128 {
    129   return __builtin_loongson_paddsh (s, t);
    130 }
    131 
    132 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    133 paddsb (int8x8_t s, int8x8_t t)
    134 {
    135   return __builtin_loongson_paddsb (s, t);
    136 }
    137 
    138 /* Vector addition, treating overflow by unsigned saturation.  */
    139 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    140 paddush (uint16x4_t s, uint16x4_t t)
    141 {
    142   return __builtin_loongson_paddush (s, t);
    143 }
    144 
    145 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    146 paddusb (uint8x8_t s, uint8x8_t t)
    147 {
    148   return __builtin_loongson_paddusb (s, t);
    149 }
    150 
    151 /* Logical AND NOT.  */
    152 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
    153 pandn_ud (uint64_t s, uint64_t t)
    154 {
    155   return __builtin_loongson_pandn_ud (s, t);
    156 }
    157 
    158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    159 pandn_uw (uint32x2_t s, uint32x2_t t)
    160 {
    161   return __builtin_loongson_pandn_uw (s, t);
    162 }
    163 
    164 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    165 pandn_uh (uint16x4_t s, uint16x4_t t)
    166 {
    167   return __builtin_loongson_pandn_uh (s, t);
    168 }
    169 
    170 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    171 pandn_ub (uint8x8_t s, uint8x8_t t)
    172 {
    173   return __builtin_loongson_pandn_ub (s, t);
    174 }
    175 
    176 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    177 pandn_sd (int64_t s, int64_t t)
    178 {
    179   return __builtin_loongson_pandn_sd (s, t);
    180 }
    181 
    182 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    183 pandn_sw (int32x2_t s, int32x2_t t)
    184 {
    185   return __builtin_loongson_pandn_sw (s, t);
    186 }
    187 
    188 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    189 pandn_sh (int16x4_t s, int16x4_t t)
    190 {
    191   return __builtin_loongson_pandn_sh (s, t);
    192 }
    193 
    194 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    195 pandn_sb (int8x8_t s, int8x8_t t)
    196 {
    197   return __builtin_loongson_pandn_sb (s, t);
    198 }
    199 
    200 /* Average.  */
    201 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    202 pavgh (uint16x4_t s, uint16x4_t t)
    203 {
    204   return __builtin_loongson_pavgh (s, t);
    205 }
    206 
    207 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    208 pavgb (uint8x8_t s, uint8x8_t t)
    209 {
    210   return __builtin_loongson_pavgb (s, t);
    211 }
    212 
    213 /* Equality test.  */
    214 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    215 pcmpeqw_u (uint32x2_t s, uint32x2_t t)
    216 {
    217   return __builtin_loongson_pcmpeqw_u (s, t);
    218 }
    219 
    220 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    221 pcmpeqh_u (uint16x4_t s, uint16x4_t t)
    222 {
    223   return __builtin_loongson_pcmpeqh_u (s, t);
    224 }
    225 
    226 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    227 pcmpeqb_u (uint8x8_t s, uint8x8_t t)
    228 {
    229   return __builtin_loongson_pcmpeqb_u (s, t);
    230 }
    231 
    232 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    233 pcmpeqw_s (int32x2_t s, int32x2_t t)
    234 {
    235   return __builtin_loongson_pcmpeqw_s (s, t);
    236 }
    237 
    238 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    239 pcmpeqh_s (int16x4_t s, int16x4_t t)
    240 {
    241   return __builtin_loongson_pcmpeqh_s (s, t);
    242 }
    243 
    244 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    245 pcmpeqb_s (int8x8_t s, int8x8_t t)
    246 {
    247   return __builtin_loongson_pcmpeqb_s (s, t);
    248 }
    249 
    250 /* Greater-than test.  */
    251 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    252 pcmpgtw_u (uint32x2_t s, uint32x2_t t)
    253 {
    254   return __builtin_loongson_pcmpgtw_u (s, t);
    255 }
    256 
    257 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    258 pcmpgth_u (uint16x4_t s, uint16x4_t t)
    259 {
    260   return __builtin_loongson_pcmpgth_u (s, t);
    261 }
    262 
    263 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    264 pcmpgtb_u (uint8x8_t s, uint8x8_t t)
    265 {
    266   return __builtin_loongson_pcmpgtb_u (s, t);
    267 }
    268 
    269 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    270 pcmpgtw_s (int32x2_t s, int32x2_t t)
    271 {
    272   return __builtin_loongson_pcmpgtw_s (s, t);
    273 }
    274 
    275 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    276 pcmpgth_s (int16x4_t s, int16x4_t t)
    277 {
    278   return __builtin_loongson_pcmpgth_s (s, t);
    279 }
    280 
    281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    282 pcmpgtb_s (int8x8_t s, int8x8_t t)
    283 {
    284   return __builtin_loongson_pcmpgtb_s (s, t);
    285 }
    286 
    287 /* Extract halfword.  */
    288 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    289 pextrh_u (uint16x4_t s, int field /* 0--3 */)
    290 {
    291   return __builtin_loongson_pextrh_u (s, field);
    292 }
    293 
    294 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    295 pextrh_s (int16x4_t s, int field /* 0--3 */)
    296 {
    297   return __builtin_loongson_pextrh_s (s, field);
    298 }
    299 
    300 /* Insert halfword.  */
    301 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    302 pinsrh_0_u (uint16x4_t s, uint16x4_t t)
    303 {
    304   return __builtin_loongson_pinsrh_0_u (s, t);
    305 }
    306 
    307 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    308 pinsrh_1_u (uint16x4_t s, uint16x4_t t)
    309 {
    310   return __builtin_loongson_pinsrh_1_u (s, t);
    311 }
    312 
    313 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    314 pinsrh_2_u (uint16x4_t s, uint16x4_t t)
    315 {
    316   return __builtin_loongson_pinsrh_2_u (s, t);
    317 }
    318 
    319 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    320 pinsrh_3_u (uint16x4_t s, uint16x4_t t)
    321 {
    322   return __builtin_loongson_pinsrh_3_u (s, t);
    323 }
    324 
    325 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    326 pinsrh_0_s (int16x4_t s, int16x4_t t)
    327 {
    328   return __builtin_loongson_pinsrh_0_s (s, t);
    329 }
    330 
    331 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    332 pinsrh_1_s (int16x4_t s, int16x4_t t)
    333 {
    334   return __builtin_loongson_pinsrh_1_s (s, t);
    335 }
    336 
    337 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    338 pinsrh_2_s (int16x4_t s, int16x4_t t)
    339 {
    340   return __builtin_loongson_pinsrh_2_s (s, t);
    341 }
    342 
    343 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    344 pinsrh_3_s (int16x4_t s, int16x4_t t)
    345 {
    346   return __builtin_loongson_pinsrh_3_s (s, t);
    347 }
    348 
    349 /* Multiply and add.  */
    350 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    351 pmaddhw (int16x4_t s, int16x4_t t)
    352 {
    353   return __builtin_loongson_pmaddhw (s, t);
    354 }
    355 
    356 /* Maximum of signed halfwords.  */
    357 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    358 pmaxsh (int16x4_t s, int16x4_t t)
    359 {
    360   return __builtin_loongson_pmaxsh (s, t);
    361 }
    362 
    363 /* Maximum of unsigned bytes.  */
    364 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    365 pmaxub (uint8x8_t s, uint8x8_t t)
    366 {
    367   return __builtin_loongson_pmaxub (s, t);
    368 }
    369 
    370 /* Minimum of signed halfwords.  */
    371 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    372 pminsh (int16x4_t s, int16x4_t t)
    373 {
    374   return __builtin_loongson_pminsh (s, t);
    375 }
    376 
    377 /* Minimum of unsigned bytes.  */
    378 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    379 pminub (uint8x8_t s, uint8x8_t t)
    380 {
    381   return __builtin_loongson_pminub (s, t);
    382 }
    383 
    384 /* Move byte mask.  */
    385 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    386 pmovmskb_u (uint8x8_t s)
    387 {
    388   return __builtin_loongson_pmovmskb_u (s);
    389 }
    390 
    391 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    392 pmovmskb_s (int8x8_t s)
    393 {
    394   return __builtin_loongson_pmovmskb_s (s);
    395 }
    396 
    397 /* Multiply unsigned integers and store high result.  */
    398 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    399 pmulhuh (uint16x4_t s, uint16x4_t t)
    400 {
    401   return __builtin_loongson_pmulhuh (s, t);
    402 }
    403 
    404 /* Multiply signed integers and store high result.  */
    405 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    406 pmulhh (int16x4_t s, int16x4_t t)
    407 {
    408   return __builtin_loongson_pmulhh (s, t);
    409 }
    410 
    411 /* Multiply signed integers and store low result.  */
    412 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    413 pmullh (int16x4_t s, int16x4_t t)
    414 {
    415   return __builtin_loongson_pmullh (s, t);
    416 }
    417 
    418 /* Multiply unsigned word integers.  */
    419 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    420 pmuluw (uint32x2_t s, uint32x2_t t)
    421 {
    422   return __builtin_loongson_pmuluw (s, t);
    423 }
    424 
    425 /* Absolute difference.  */
    426 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    427 pasubub (uint8x8_t s, uint8x8_t t)
    428 {
    429   return __builtin_loongson_pasubub (s, t);
    430 }
    431 
    432 /* Sum of unsigned byte integers.  */
    433 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    434 biadd (uint8x8_t s)
    435 {
    436   return __builtin_loongson_biadd (s);
    437 }
    438 
    439 /* Sum of absolute differences.
    440    Note that this intrinsic expands into two machine instructions:
    441    PASUBUB followed by BIADD.  */
    442 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    443 psadbh (uint8x8_t s, uint8x8_t t)
    444 {
    445   return __builtin_loongson_psadbh (s, t);
    446 }
    447 
    448 /* Shuffle halfwords.  */
    449 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    450 pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
    451 {
    452   return __builtin_loongson_pshufh_u (s, order);
    453 }
    454 
    455 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    456 pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
    457 {
    458   return __builtin_loongson_pshufh_s (s, order);
    459 }
    460 
    461 /* Shift left logical.  */
    462 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    463 psllh_u (uint16x4_t s, uint8_t amount)
    464 {
    465   return __builtin_loongson_psllh_u (s, amount);
    466 }
    467 
    468 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    469 psllh_s (int16x4_t s, uint8_t amount)
    470 {
    471   return __builtin_loongson_psllh_s (s, amount);
    472 }
    473 
    474 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    475 psllw_u (uint32x2_t s, uint8_t amount)
    476 {
    477   return __builtin_loongson_psllw_u (s, amount);
    478 }
    479 
    480 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    481 psllw_s (int32x2_t s, uint8_t amount)
    482 {
    483   return __builtin_loongson_psllw_s (s, amount);
    484 }
    485 
    486 /* Shift right logical.  */
    487 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    488 psrlh_u (uint16x4_t s, uint8_t amount)
    489 {
    490   return __builtin_loongson_psrlh_u (s, amount);
    491 }
    492 
    493 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    494 psrlh_s (int16x4_t s, uint8_t amount)
    495 {
    496   return __builtin_loongson_psrlh_s (s, amount);
    497 }
    498 
    499 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    500 psrlw_u (uint32x2_t s, uint8_t amount)
    501 {
    502   return __builtin_loongson_psrlw_u (s, amount);
    503 }
    504 
    505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    506 psrlw_s (int32x2_t s, uint8_t amount)
    507 {
    508   return __builtin_loongson_psrlw_s (s, amount);
    509 }
    510 
    511 /* Shift right arithmetic.  */
    512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    513 psrah_u (uint16x4_t s, uint8_t amount)
    514 {
    515   return __builtin_loongson_psrah_u (s, amount);
    516 }
    517 
    518 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    519 psrah_s (int16x4_t s, uint8_t amount)
    520 {
    521   return __builtin_loongson_psrah_s (s, amount);
    522 }
    523 
    524 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    525 psraw_u (uint32x2_t s, uint8_t amount)
    526 {
    527   return __builtin_loongson_psraw_u (s, amount);
    528 }
    529 
    530 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    531 psraw_s (int32x2_t s, uint8_t amount)
    532 {
    533   return __builtin_loongson_psraw_s (s, amount);
    534 }
    535 
    536 /* Vector subtraction, treating overflow by wraparound.  */
    537 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    538 psubw_u (uint32x2_t s, uint32x2_t t)
    539 {
    540   return __builtin_loongson_psubw_u (s, t);
    541 }
    542 
    543 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    544 psubh_u (uint16x4_t s, uint16x4_t t)
    545 {
    546   return __builtin_loongson_psubh_u (s, t);
    547 }
    548 
    549 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    550 psubb_u (uint8x8_t s, uint8x8_t t)
    551 {
    552   return __builtin_loongson_psubb_u (s, t);
    553 }
    554 
    555 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    556 psubw_s (int32x2_t s, int32x2_t t)
    557 {
    558   return __builtin_loongson_psubw_s (s, t);
    559 }
    560 
    561 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    562 psubh_s (int16x4_t s, int16x4_t t)
    563 {
    564   return __builtin_loongson_psubh_s (s, t);
    565 }
    566 
    567 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    568 psubb_s (int8x8_t s, int8x8_t t)
    569 {
    570   return __builtin_loongson_psubb_s (s, t);
    571 }
    572 
    573 /* Subtraction of doubleword integers, treating overflow by wraparound.  */
    574 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
    575 psubd_u (uint64_t s, uint64_t t)
    576 {
    577   return __builtin_loongson_psubd_u (s, t);
    578 }
    579 
    580 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    581 psubd_s (int64_t s, int64_t t)
    582 {
    583   return __builtin_loongson_psubd_s (s, t);
    584 }
    585 
    586 /* Vector subtraction, treating overflow by signed saturation.  */
    587 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    588 psubsh (int16x4_t s, int16x4_t t)
    589 {
    590   return __builtin_loongson_psubsh (s, t);
    591 }
    592 
    593 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    594 psubsb (int8x8_t s, int8x8_t t)
    595 {
    596   return __builtin_loongson_psubsb (s, t);
    597 }
    598 
    599 /* Vector subtraction, treating overflow by unsigned saturation.  */
    600 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    601 psubush (uint16x4_t s, uint16x4_t t)
    602 {
    603   return __builtin_loongson_psubush (s, t);
    604 }
    605 
    606 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    607 psubusb (uint8x8_t s, uint8x8_t t)
    608 {
    609   return __builtin_loongson_psubusb (s, t);
    610 }
    611 
    612 /* Unpack high data.  */
    613 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    614 punpckhwd_u (uint32x2_t s, uint32x2_t t)
    615 {
    616   return __builtin_loongson_punpckhwd_u (s, t);
    617 }
    618 
    619 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    620 punpckhhw_u (uint16x4_t s, uint16x4_t t)
    621 {
    622   return __builtin_loongson_punpckhhw_u (s, t);
    623 }
    624 
    625 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    626 punpckhbh_u (uint8x8_t s, uint8x8_t t)
    627 {
    628   return __builtin_loongson_punpckhbh_u (s, t);
    629 }
    630 
    631 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    632 punpckhwd_s (int32x2_t s, int32x2_t t)
    633 {
    634   return __builtin_loongson_punpckhwd_s (s, t);
    635 }
    636 
    637 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    638 punpckhhw_s (int16x4_t s, int16x4_t t)
    639 {
    640   return __builtin_loongson_punpckhhw_s (s, t);
    641 }
    642 
    643 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    644 punpckhbh_s (int8x8_t s, int8x8_t t)
    645 {
    646   return __builtin_loongson_punpckhbh_s (s, t);
    647 }
    648 
    649 /* Unpack low data.  */
    650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    651 punpcklwd_u (uint32x2_t s, uint32x2_t t)
    652 {
    653   return __builtin_loongson_punpcklwd_u (s, t);
    654 }
    655 
    656 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    657 punpcklhw_u (uint16x4_t s, uint16x4_t t)
    658 {
    659   return __builtin_loongson_punpcklhw_u (s, t);
    660 }
    661 
    662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    663 punpcklbh_u (uint8x8_t s, uint8x8_t t)
    664 {
    665   return __builtin_loongson_punpcklbh_u (s, t);
    666 }
    667 
    668 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    669 punpcklwd_s (int32x2_t s, int32x2_t t)
    670 {
    671   return __builtin_loongson_punpcklwd_s (s, t);
    672 }
    673 
    674 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    675 punpcklhw_s (int16x4_t s, int16x4_t t)
    676 {
    677   return __builtin_loongson_punpcklhw_s (s, t);
    678 }
    679 
    680 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    681 punpcklbh_s (int8x8_t s, int8x8_t t)
    682 {
    683   return __builtin_loongson_punpcklbh_s (s, t);
    684 }
    685 
    686 #ifdef __cplusplus
    687 }
    688 #endif
    689 
    690 #endif
    691