Home | History | Annotate | Download | only in clang-include
      1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __MMINTRIN_H
     25 #define __MMINTRIN_H
     26 
     27 typedef long long __m64 __attribute__((__vector_size__(8)));
     28 
     29 typedef long long __v1di __attribute__((__vector_size__(8)));
     30 typedef int __v2si __attribute__((__vector_size__(8)));
     31 typedef short __v4hi __attribute__((__vector_size__(8)));
     32 typedef char __v8qi __attribute__((__vector_size__(8)));
     33 
     34 /* Define the default attributes for the functions in this file. */
     35 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
     36 
     37 /// \brief Clears the MMX state by setting the state of the x87 stack registers
     38 ///    to empty.
     39 ///
     40 /// \headerfile <x86intrin.h>
     41 ///
     42 /// This intrinsic corresponds to the \c EMMS instruction.
     43 ///
     44 static __inline__ void __DEFAULT_FN_ATTRS
     45 _mm_empty(void)
     46 {
     47     __builtin_ia32_emms();
     48 }
     49 
     50 /// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
     51 ///    value of the 32-bit integer parameter and setting the upper 32 bits to 0.
     52 ///
     53 /// \headerfile <x86intrin.h>
     54 ///
     55 /// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
     56 ///
     57 /// \param __i
     58 ///    A 32-bit integer value.
     59 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
     60 ///    parameter. The upper 32 bits are set to 0.
     61 static __inline__ __m64 __DEFAULT_FN_ATTRS
     62 _mm_cvtsi32_si64(int __i)
     63 {
     64     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
     65 }
     66 
     67 /// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
     68 ///    signed integer.
     69 ///
     70 /// \headerfile <x86intrin.h>
     71 ///
     72 /// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
     73 ///
     74 /// \param __m
     75 ///    A 64-bit integer vector.
     76 /// \returns A 32-bit signed integer value containing the lower 32 bits of the
     77 ///    parameter.
     78 static __inline__ int __DEFAULT_FN_ATTRS
     79 _mm_cvtsi64_si32(__m64 __m)
     80 {
     81     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
     82 }
     83 
     84 /// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
     85 ///
     86 /// \headerfile <x86intrin.h>
     87 ///
     88 /// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
     89 ///
     90 /// \param __i
     91 ///    A 64-bit signed integer.
     92 /// \returns A 64-bit integer vector containing the same bitwise pattern as the
     93 ///    parameter.
     94 static __inline__ __m64 __DEFAULT_FN_ATTRS
     95 _mm_cvtsi64_m64(long long __i)
     96 {
     97     return (__m64)__i;
     98 }
     99 
    100 /// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
    101 ///
    102 /// \headerfile <x86intrin.h>
    103 ///
    104 /// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
    105 ///
    106 /// \param __m
    107 ///    A 64-bit integer vector.
    108 /// \returns A 64-bit signed integer containing the same bitwise pattern as the
    109 ///    parameter.
    110 static __inline__ long long __DEFAULT_FN_ATTRS
    111 _mm_cvtm64_si64(__m64 __m)
    112 {
    113     return (long long)__m;
    114 }
    115 
    116 /// \brief Converts 16-bit signed integers from both 64-bit integer vector
    117 ///    parameters of [4 x i16] into 8-bit signed integer values, and constructs
    118 ///    a 64-bit integer vector of [8 x i8] as the result. Positive values
    119 ///    greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
    120 ///    are saturated to 0x80.
    121 ///
    122 /// \headerfile <x86intrin.h>
    123 ///
    124 /// This intrinsic corresponds to the \c PACKSSWB instruction.
    125 ///
    126 /// \param __m1
    127 ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
    128 ///    16-bit signed integer and is converted to an 8-bit signed integer with
    129 ///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
    130 ///    Negative values less than 0x80 are saturated to 0x80. The converted
    131 ///    [4 x i8] values are written to the lower 32 bits of the result.
    132 /// \param __m2
    133 ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
    134 ///    16-bit signed integer and is converted to an 8-bit signed integer with
    135 ///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
    136 ///    Negative values less than 0x80 are saturated to 0x80. The converted
    137 ///    [4 x i8] values are written to the upper 32 bits of the result.
    138 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
    139 ///    values.
    140 static __inline__ __m64 __DEFAULT_FN_ATTRS
    141 _mm_packs_pi16(__m64 __m1, __m64 __m2)
    142 {
    143     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
    144 }
    145 
    146 /// \brief Converts 32-bit signed integers from both 64-bit integer vector
    147 ///    parameters of [2 x i32] into 16-bit signed integer values, and constructs
    148 ///    a 64-bit integer vector of [4 x i16] as the result. Positive values
    149 ///    greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
    150 ///    0x8000 are saturated to 0x8000.
    151 ///
    152 /// \headerfile <x86intrin.h>
    153 ///
    154 /// This intrinsic corresponds to the \c PACKSSDW instruction.
    155 ///
    156 /// \param __m1
    157 ///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
    158 ///    32-bit signed integer and is converted to a 16-bit signed integer with
    159 ///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
    160 ///    Negative values less than 0x8000 are saturated to 0x8000. The converted
    161 ///    [2 x i16] values are written to the lower 32 bits of the result.
    162 /// \param __m2
    163 ///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
    164 ///    32-bit signed integer and is converted to a 16-bit signed integer with
    165 ///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
    166 ///    Negative values less than 0x8000 are saturated to 0x8000. The converted
    167 ///    [2 x i16] values are written to the upper 32 bits of the result.
    168 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
    169 ///    values.
    170 static __inline__ __m64 __DEFAULT_FN_ATTRS
    171 _mm_packs_pi32(__m64 __m1, __m64 __m2)
    172 {
    173     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
    174 }
    175 
    176 /// \brief Converts 16-bit signed integers from both 64-bit integer vector
    177 ///    parameters of [4 x i16] into 8-bit unsigned integer values, and
    178 ///    constructs a 64-bit integer vector of [8 x i8] as the result. Values
    179 ///    greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
    180 ///    to 0.
    181 ///
    182 /// \headerfile <x86intrin.h>
    183 ///
    184 /// This intrinsic corresponds to the \c PACKUSWB instruction.
    185 ///
    186 /// \param __m1
    187 ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
    188 ///    16-bit signed integer and is converted to an 8-bit unsigned integer with
    189 ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
    190 ///    than 0 are saturated to 0. The converted [4 x i8] values are written to
    191 ///    the lower 32 bits of the result.
    192 /// \param __m2
    193 ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
    194 ///    16-bit signed integer and is converted to an 8-bit unsigned integer with
    195 ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
    196 ///    than 0 are saturated to 0. The converted [4 x i8] values are written to
    197 ///    the upper 32 bits of the result.
    198 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
    199 ///    values.
    200 static __inline__ __m64 __DEFAULT_FN_ATTRS
    201 _mm_packs_pu16(__m64 __m1, __m64 __m2)
    202 {
    203     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
    204 }
    205 
    206 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
    207 ///    and interleaves them into a 64-bit integer vector of [8 x i8].
    208 ///
    209 /// \headerfile <x86intrin.h>
    210 ///
    211 /// This intrinsic corresponds to the \c PUNPCKHBW instruction.
    212 ///
    213 /// \param __m1
    214 ///    A 64-bit integer vector of [8 x i8].
    215 ///    Bits [39:32] are written to bits [7:0] of the result.
    216 ///    Bits [47:40] are written to bits [23:16] of the result.
    217 ///    Bits [55:48] are written to bits [39:32] of the result.
    218 ///    Bits [63:56] are written to bits [55:48] of the result.
    219 /// \param __m2
    220 ///    A 64-bit integer vector of [8 x i8].
    221 ///    Bits [39:32] are written to bits [15:8] of the result.
    222 ///    Bits [47:40] are written to bits [31:24] of the result.
    223 ///    Bits [55:48] are written to bits [47:40] of the result.
    224 ///    Bits [63:56] are written to bits [63:56] of the result.
    225 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
    226 ///    values.
    227 static __inline__ __m64 __DEFAULT_FN_ATTRS
    228 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
    229 {
    230     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
    231 }
    232 
    233 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
    234 ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
    235 ///
    236 /// \headerfile <x86intrin.h>
    237 ///
    238 /// This intrinsic corresponds to the \c PUNPCKHWD instruction.
    239 ///
    240 /// \param __m1
    241 ///    A 64-bit integer vector of [4 x i16].
    242 ///    Bits [47:32] are written to bits [15:0] of the result.
    243 ///    Bits [63:48] are written to bits [47:32] of the result.
    244 /// \param __m2
    245 ///    A 64-bit integer vector of [4 x i16].
    246 ///    Bits [47:32] are written to bits [31:16] of the result.
    247 ///    Bits [63:48] are written to bits [63:48] of the result.
    248 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
    249 ///    values.
    250 static __inline__ __m64 __DEFAULT_FN_ATTRS
    251 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
    252 {
    253     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
    254 }
    255 
    256 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
    257 ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
    258 ///
    259 /// \headerfile <x86intrin.h>
    260 ///
    261 /// This intrinsic corresponds to the \c PUNPCKHDQ instruction.
    262 ///
    263 /// \param __m1
    264 ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
    265 ///    the lower 32 bits of the result.
    266 /// \param __m2
    267 ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
    268 ///    the upper 32 bits of the result.
    269 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
    270 ///    values.
    271 static __inline__ __m64 __DEFAULT_FN_ATTRS
    272 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
    273 {
    274     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
    275 }
    276 
    277 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
    278 ///    and interleaves them into a 64-bit integer vector of [8 x i8].
    279 ///
    280 /// \headerfile <x86intrin.h>
    281 ///
    282 /// This intrinsic corresponds to the \c PUNPCKLBW instruction.
    283 ///
    284 /// \param __m1
    285 ///    A 64-bit integer vector of [8 x i8].
    286 ///    Bits [7:0] are written to bits [7:0] of the result.
    287 ///    Bits [15:8] are written to bits [23:16] of the result.
    288 ///    Bits [23:16] are written to bits [39:32] of the result.
    289 ///    Bits [31:24] are written to bits [55:48] of the result.
    290 /// \param __m2
    291 ///    A 64-bit integer vector of [8 x i8].
    292 ///    Bits [7:0] are written to bits [15:8] of the result.
    293 ///    Bits [15:8] are written to bits [31:24] of the result.
    294 ///    Bits [23:16] are written to bits [47:40] of the result.
    295 ///    Bits [31:24] are written to bits [63:56] of the result.
    296 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
    297 ///    values.
    298 static __inline__ __m64 __DEFAULT_FN_ATTRS
    299 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
    300 {
    301     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
    302 }
    303 
    304 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
    305 ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
    306 ///
    307 /// \headerfile <x86intrin.h>
    308 ///
    309 /// This intrinsic corresponds to the \c PUNPCKLWD instruction.
    310 ///
    311 /// \param __m1
    312 ///    A 64-bit integer vector of [4 x i16].
    313 ///    Bits [15:0] are written to bits [15:0] of the result.
    314 ///    Bits [31:16] are written to bits [47:32] of the result.
    315 /// \param __m2
    316 ///    A 64-bit integer vector of [4 x i16].
    317 ///    Bits [15:0] are written to bits [31:16] of the result.
    318 ///    Bits [31:16] are written to bits [63:48] of the result.
    319 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
    320 ///    values.
    321 static __inline__ __m64 __DEFAULT_FN_ATTRS
    322 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
    323 {
    324     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
    325 }
    326 
    327 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
    328 ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
    329 ///
    330 /// \headerfile <x86intrin.h>
    331 ///
    332 /// This intrinsic corresponds to the \c PUNPCKLDQ instruction.
    333 ///
    334 /// \param __m1
    335 ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
    336 ///    the lower 32 bits of the result.
    337 /// \param __m2
    338 ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
    339 ///    the upper 32 bits of the result.
    340 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
    341 ///    values.
    342 static __inline__ __m64 __DEFAULT_FN_ATTRS
    343 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
    344 {
    345     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
    346 }
    347 
    348 /// \brief Adds each 8-bit integer element of the first 64-bit integer vector
    349 ///    of [8 x i8] to the corresponding 8-bit integer element of the second
    350 ///    64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
    351 ///    packed into a 64-bit integer vector of [8 x i8].
    352 ///
    353 /// \headerfile <x86intrin.h>
    354 ///
    355 /// This intrinsic corresponds to the \c PADDB instruction.
    356 ///
    357 /// \param __m1
    358 ///    A 64-bit integer vector of [8 x i8].
    359 /// \param __m2
    360 ///    A 64-bit integer vector of [8 x i8].
    361 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
    362 ///    parameters.
    363 static __inline__ __m64 __DEFAULT_FN_ATTRS
    364 _mm_add_pi8(__m64 __m1, __m64 __m2)
    365 {
    366     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
    367 }
    368 
    369 /// \brief Adds each 16-bit integer element of the first 64-bit integer vector
    370 ///    of [4 x i16] to the corresponding 16-bit integer element of the second
    371 ///    64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
    372 ///    packed into a 64-bit integer vector of [4 x i16].
    373 ///
    374 /// \headerfile <x86intrin.h>
    375 ///
    376 /// This intrinsic corresponds to the \c PADDW instruction.
    377 ///
    378 /// \param __m1
    379 ///    A 64-bit integer vector of [4 x i16].
    380 /// \param __m2
    381 ///    A 64-bit integer vector of [4 x i16].
    382 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
    383 ///    parameters.
    384 static __inline__ __m64 __DEFAULT_FN_ATTRS
    385 _mm_add_pi16(__m64 __m1, __m64 __m2)
    386 {
    387     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
    388 }
    389 
    390 /// \brief Adds each 32-bit integer element of the first 64-bit integer vector
    391 ///    of [2 x i32] to the corresponding 32-bit integer element of the second
    392 ///    64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
    393 ///    packed into a 64-bit integer vector of [2 x i32].
    394 ///
    395 /// \headerfile <x86intrin.h>
    396 ///
    397 /// This intrinsic corresponds to the \c PADDD instruction.
    398 ///
    399 /// \param __m1
    400 ///    A 64-bit integer vector of [2 x i32].
    401 /// \param __m2
    402 ///    A 64-bit integer vector of [2 x i32].
    403 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
    404 ///    parameters.
    405 static __inline__ __m64 __DEFAULT_FN_ATTRS
    406 _mm_add_pi32(__m64 __m1, __m64 __m2)
    407 {
    408     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
    409 }
    410 
    411 /// \brief Adds each 8-bit signed integer element of the first 64-bit integer
    412 ///    vector of [8 x i8] to the corresponding 8-bit signed integer element of
    413 ///    the second 64-bit integer vector of [8 x i8]. Positive sums greater than
    414 ///    0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
    415 ///    0x80. The results are packed into a 64-bit integer vector of [8 x i8].
    416 ///
    417 /// \headerfile <x86intrin.h>
    418 ///
    419 /// This intrinsic corresponds to the \c PADDSB instruction.
    420 ///
    421 /// \param __m1
    422 ///    A 64-bit integer vector of [8 x i8].
    423 /// \param __m2
    424 ///    A 64-bit integer vector of [8 x i8].
    425 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
    426 ///    of both parameters.
    427 static __inline__ __m64 __DEFAULT_FN_ATTRS
    428 _mm_adds_pi8(__m64 __m1, __m64 __m2)
    429 {
    430     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
    431 }
    432 
    433 /// \brief Adds each 16-bit signed integer element of the first 64-bit integer
    434 ///    vector of [4 x i16] to the corresponding 16-bit signed integer element of
    435 ///    the second 64-bit integer vector of [4 x i16]. Positive sums greater than
    436 ///    0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
    437 ///    saturated to 0x8000. The results are packed into a 64-bit integer vector
    438 ///    of [4 x i16].
    439 ///
    440 /// \headerfile <x86intrin.h>
    441 ///
    442 /// This intrinsic corresponds to the \c PADDSW instruction.
    443 ///
    444 /// \param __m1
    445 ///    A 64-bit integer vector of [4 x i16].
    446 /// \param __m2
    447 ///    A 64-bit integer vector of [4 x i16].
    448 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
    449 ///    of both parameters.
    450 static __inline__ __m64 __DEFAULT_FN_ATTRS
    451 _mm_adds_pi16(__m64 __m1, __m64 __m2)
    452 {
    453     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
    454 }
    455 
    456 /// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
    457 ///    vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
    458 ///    the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
    459 ///    saturated to 0xFF. The results are packed into a 64-bit integer vector of
    460 ///    [8 x i8].
    461 ///
    462 /// \headerfile <x86intrin.h>
    463 ///
    464 /// This intrinsic corresponds to the \c PADDUSB instruction.
    465 ///
    466 /// \param __m1
    467 ///    A 64-bit integer vector of [8 x i8].
    468 /// \param __m2
    469 ///    A 64-bit integer vector of [8 x i8].
    470 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
    471 ///    unsigned sums of both parameters.
    472 static __inline__ __m64 __DEFAULT_FN_ATTRS
    473 _mm_adds_pu8(__m64 __m1, __m64 __m2)
    474 {
    475     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
    476 }
    477 
    478 /// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
    479 ///    vector of [4 x i16] to the corresponding 16-bit unsigned integer element
    480 ///    of the second 64-bit integer vector of [4 x i16]. Sums greater than
    481 ///    0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
    482 ///    integer vector of [4 x i16].
    483 ///
    484 /// \headerfile <x86intrin.h>
    485 ///
    486 /// This intrinsic corresponds to the \c PADDUSW instruction.
    487 ///
    488 /// \param __m1
    489 ///    A 64-bit integer vector of [4 x i16].
    490 /// \param __m2
    491 ///    A 64-bit integer vector of [4 x i16].
    492 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
    493 ///    unsigned sums of both parameters.
    494 static __inline__ __m64 __DEFAULT_FN_ATTRS
    495 _mm_adds_pu16(__m64 __m1, __m64 __m2)
    496 {
    497     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
    498 }
    499 
    500 /// \brief Subtracts each 8-bit integer element of the second 64-bit integer
    501 ///    vector of [8 x i8] from the corresponding 8-bit integer element of the
    502 ///    first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
    503 ///    are packed into a 64-bit integer vector of [8 x i8].
    504 ///
    505 /// \headerfile <x86intrin.h>
    506 ///
    507 /// This intrinsic corresponds to the \c PSUBB instruction.
    508 ///
    509 /// \param __m1
    510 ///    A 64-bit integer vector of [8 x i8] containing the minuends.
    511 /// \param __m2
    512 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
    513 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
    514 ///    both parameters.
    515 static __inline__ __m64 __DEFAULT_FN_ATTRS
    516 _mm_sub_pi8(__m64 __m1, __m64 __m2)
    517 {
    518     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
    519 }
    520 
    521 /// \brief Subtracts each 16-bit integer element of the second 64-bit integer
    522 ///    vector of [4 x i16] from the corresponding 16-bit integer element of the
    523 ///    first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
    524 ///    results are packed into a 64-bit integer vector of [4 x i16].
    525 ///
    526 /// \headerfile <x86intrin.h>
    527 ///
    528 /// This intrinsic corresponds to the \c PSUBW instruction.
    529 ///
    530 /// \param __m1
    531 ///    A 64-bit integer vector of [4 x i16] containing the minuends.
    532 /// \param __m2
    533 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
    534 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
    535 ///    both parameters.
    536 static __inline__ __m64 __DEFAULT_FN_ATTRS
    537 _mm_sub_pi16(__m64 __m1, __m64 __m2)
    538 {
    539     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
    540 }
    541 
    542 /// \brief Subtracts each 32-bit integer element of the second 64-bit integer
    543 ///    vector of [2 x i32] from the corresponding 32-bit integer element of the
    544 ///    first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
    545 ///    results are packed into a 64-bit integer vector of [2 x i32].
    546 ///
    547 /// \headerfile <x86intrin.h>
    548 ///
    549 /// This intrinsic corresponds to the \c PSUBD instruction.
    550 ///
    551 /// \param __m1
    552 ///    A 64-bit integer vector of [2 x i32] containing the minuends.
    553 /// \param __m2
    554 ///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
    555 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
    556 ///    both parameters.
    557 static __inline__ __m64 __DEFAULT_FN_ATTRS
    558 _mm_sub_pi32(__m64 __m1, __m64 __m2)
    559 {
    560     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
    561 }
    562 
    563 /// \brief Subtracts each 8-bit signed integer element of the second 64-bit
    564 ///    integer vector of [8 x i8] from the corresponding 8-bit signed integer
    565 ///    element of the first 64-bit integer vector of [8 x i8]. Positive results
    566 ///    greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
    567 ///    are saturated to 0x80. The results are packed into a 64-bit integer
    568 ///    vector of [8 x i8].
    569 ///
    570 /// \headerfile <x86intrin.h>
    571 ///
    572 /// This intrinsic corresponds to the \c PSUBSB instruction.
    573 ///
    574 /// \param __m1
    575 ///    A 64-bit integer vector of [8 x i8] containing the minuends.
    576 /// \param __m2
    577 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
    578 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
    579 ///    differences of both parameters.
    580 static __inline__ __m64 __DEFAULT_FN_ATTRS
    581 _mm_subs_pi8(__m64 __m1, __m64 __m2)
    582 {
    583     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
    584 }
    585 
    586 /// \brief Subtracts each 16-bit signed integer element of the second 64-bit
    587 ///    integer vector of [4 x i16] from the corresponding 16-bit signed integer
    588 ///    element of the first 64-bit integer vector of [4 x i16]. Positive results
    589 ///    greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
    590 ///    0x8000 are saturated to 0x8000. The results are packed into a 64-bit
    591 ///    integer vector of [4 x i16].
    592 ///
    593 /// \headerfile <x86intrin.h>
    594 ///
    595 /// This intrinsic corresponds to the \c PSUBSW instruction.
    596 ///
    597 /// \param __m1
    598 ///    A 64-bit integer vector of [4 x i16] containing the minuends.
    599 /// \param __m2
    600 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
    601 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
    602 ///    differences of both parameters.
    603 static __inline__ __m64 __DEFAULT_FN_ATTRS
    604 _mm_subs_pi16(__m64 __m1, __m64 __m2)
    605 {
    606     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
    607 }
    608 
    609 /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
    610 ///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
    611 ///    element of the first 64-bit integer vector of [8 x i8]. If an element of
    612 ///    the first vector is less than the corresponding element of the second
    613 ///    vector, the result is saturated to 0. The results are packed into a
    614 ///    64-bit integer vector of [8 x i8].
    615 ///
    616 /// \headerfile <x86intrin.h>
    617 ///
    618 /// This intrinsic corresponds to the \c PSUBUSB instruction.
    619 ///
    620 /// \param __m1
    621 ///    A 64-bit integer vector of [8 x i8] containing the minuends.
    622 /// \param __m2
    623 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
    624 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
    625 ///    differences of both parameters.
    626 static __inline__ __m64 __DEFAULT_FN_ATTRS
    627 _mm_subs_pu8(__m64 __m1, __m64 __m2)
    628 {
    629     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
    630 }
    631 
    632 /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
    633 ///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
    634 ///    integer element of the first 64-bit integer vector of [4 x i16]. If an
    635 ///    element of the first vector is less than the corresponding element of the
    636 ///    second vector, the result is saturated to 0. The results are packed into
    637 ///    a 64-bit integer vector of [4 x i16].
    638 ///
    639 /// \headerfile <x86intrin.h>
    640 ///
    641 /// This intrinsic corresponds to the \c PSUBUSW instruction.
    642 ///
    643 /// \param __m1
    644 ///    A 64-bit integer vector of [4 x i16] containing the minuends.
    645 /// \param __m2
    646 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
    647 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
    648 ///    differences of both parameters.
    649 static __inline__ __m64 __DEFAULT_FN_ATTRS
    650 _mm_subs_pu16(__m64 __m1, __m64 __m2)
    651 {
    652     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
    653 }
    654 
    655 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
    656 ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
    657 ///    element of the second 64-bit integer vector of [4 x i16] and get four
    658 ///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
    659 ///    The lower 32 bits of these two sums are packed into a 64-bit integer
    660 ///    vector of [2 x i32]. For example, bits [15:0] of both parameters are
    661 ///    multiplied, bits [31:16] of both parameters are multiplied, and the sum
    662 ///    of both results is written to bits [31:0] of the result.
    663 ///
    664 /// \headerfile <x86intrin.h>
    665 ///
    666 /// This intrinsic corresponds to the \c PMADDWD instruction.
    667 ///
    668 /// \param __m1
    669 ///    A 64-bit integer vector of [4 x i16].
    670 /// \param __m2
    671 ///    A 64-bit integer vector of [4 x i16].
    672 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
    673 ///    products of both parameters.
    674 static __inline__ __m64 __DEFAULT_FN_ATTRS
    675 _mm_madd_pi16(__m64 __m1, __m64 __m2)
    676 {
    677     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
    678 }
    679 
    680 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
    681 ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
    682 ///    element of the second 64-bit integer vector of [4 x i16]. Packs the upper
    683 ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
    684 ///
    685 /// \headerfile <x86intrin.h>
    686 ///
    687 /// This intrinsic corresponds to the \c PMULHW instruction.
    688 ///
    689 /// \param __m1
    690 ///    A 64-bit integer vector of [4 x i16].
    691 /// \param __m2
    692 ///    A 64-bit integer vector of [4 x i16].
    693 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
    694 ///    of the products of both parameters.
    695 static __inline__ __m64 __DEFAULT_FN_ATTRS
    696 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
    697 {
    698     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
    699 }
    700 
    701 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
    702 ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
    703 ///    element of the second 64-bit integer vector of [4 x i16]. Packs the lower
    704 ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
    705 ///
    706 /// \headerfile <x86intrin.h>
    707 ///
    708 /// This intrinsic corresponds to the \c PMULLW instruction.
    709 ///
    710 /// \param __m1
    711 ///    A 64-bit integer vector of [4 x i16].
    712 /// \param __m2
    713 ///    A 64-bit integer vector of [4 x i16].
    714 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
    715 ///    of the products of both parameters.
    716 static __inline__ __m64 __DEFAULT_FN_ATTRS
    717 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
    718 {
    719     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
    720 }
    721 
    722 /// \brief Left-shifts each 16-bit signed integer element of the first
    723 ///    parameter, which is a 64-bit integer vector of [4 x i16], by the number
    724 ///    of bits specified by the second parameter, which is a 64-bit integer. The
    725 ///    lower 16 bits of the results are packed into a 64-bit integer vector of
    726 ///    [4 x i16].
    727 ///
    728 /// \headerfile <x86intrin.h>
    729 ///
    730 /// This intrinsic corresponds to the \c PSLLW instruction.
    731 ///
    732 /// \param __m
    733 ///    A 64-bit integer vector of [4 x i16].
    734 /// \param __count
    735 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    736 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
    737 ///    values. If __count is greater or equal to 16, the result is set to all 0.
    738 static __inline__ __m64 __DEFAULT_FN_ATTRS
    739 _mm_sll_pi16(__m64 __m, __m64 __count)
    740 {
    741     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
    742 }
    743 
    744 /// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
    745 ///    vector of [4 x i16] by the number of bits specified by a 32-bit integer.
    746 ///    The lower 16 bits of the results are packed into a 64-bit integer vector
    747 ///    of [4 x i16].
    748 ///
    749 /// \headerfile <x86intrin.h>
    750 ///
    751 /// This intrinsic corresponds to the \c PSLLW instruction.
    752 ///
    753 /// \param __m
    754 ///    A 64-bit integer vector of [4 x i16].
    755 /// \param __count
    756 ///    A 32-bit integer value.
    757 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
    758 ///    values. If __count is greater or equal to 16, the result is set to all 0.
    759 static __inline__ __m64 __DEFAULT_FN_ATTRS
    760 _mm_slli_pi16(__m64 __m, int __count)
    761 {
    762     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
    763 }
    764 
    765 /// \brief Left-shifts each 32-bit signed integer element of the first
    766 ///    parameter, which is a 64-bit integer vector of [2 x i32], by the number
    767 ///    of bits specified by the second parameter, which is a 64-bit integer. The
    768 ///    lower 32 bits of the results are packed into a 64-bit integer vector of
    769 ///    [2 x i32].
    770 ///
    771 /// \headerfile <x86intrin.h>
    772 ///
    773 /// This intrinsic corresponds to the \c PSLLD instruction.
    774 ///
    775 /// \param __m
    776 ///    A 64-bit integer vector of [2 x i32].
    777 /// \param __count
    778 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    779 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
    780 ///    values. If __count is greater or equal to 32, the result is set to all 0.
    781 static __inline__ __m64 __DEFAULT_FN_ATTRS
    782 _mm_sll_pi32(__m64 __m, __m64 __count)
    783 {
    784     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
    785 }
    786 
    787 /// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
    788 ///    vector of [2 x i32] by the number of bits specified by a 32-bit integer.
    789 ///    The lower 32 bits of the results are packed into a 64-bit integer vector
    790 ///    of [2 x i32].
    791 ///
    792 /// \headerfile <x86intrin.h>
    793 ///
    794 /// This intrinsic corresponds to the \c PSLLD instruction.
    795 ///
    796 /// \param __m
    797 ///    A 64-bit integer vector of [2 x i32].
    798 /// \param __count
    799 ///    A 32-bit integer value.
    800 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
    801 ///    values. If __count is greater or equal to 32, the result is set to all 0.
    802 static __inline__ __m64 __DEFAULT_FN_ATTRS
    803 _mm_slli_pi32(__m64 __m, int __count)
    804 {
    805     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
    806 }
    807 
    808 /// \brief Left-shifts the first 64-bit integer parameter by the number of bits
    809 ///    specified by the second 64-bit integer parameter. The lower 64 bits of
    810 ///    result are returned.
    811 ///
    812 /// \headerfile <x86intrin.h>
    813 ///
    814 /// This intrinsic corresponds to the \c PSLLQ instruction.
    815 ///
    816 /// \param __m
    817 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    818 /// \param __count
    819 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    820 /// \returns A 64-bit integer vector containing the left-shifted value. If
    821 ///     __count is greater or equal to 64, the result is set to 0.
    822 static __inline__ __m64 __DEFAULT_FN_ATTRS
    823 _mm_sll_si64(__m64 __m, __m64 __count)
    824 {
    825     return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
    826 }
    827 
    828 /// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
    829 ///    number of bits specified by the second parameter, which is a 32-bit
    830 ///    integer. The lower 64 bits of result are returned.
    831 ///
    832 /// \headerfile <x86intrin.h>
    833 ///
    834 /// This intrinsic corresponds to the \c PSLLQ instruction.
    835 ///
    836 /// \param __m
    837 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    838 /// \param __count
    839 ///    A 32-bit integer value.
    840 /// \returns A 64-bit integer vector containing the left-shifted value. If
    841 ///     __count is greater or equal to 64, the result is set to 0.
    842 static __inline__ __m64 __DEFAULT_FN_ATTRS
    843 _mm_slli_si64(__m64 __m, int __count)
    844 {
    845     return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
    846 }
    847 
    848 /// \brief Right-shifts each 16-bit integer element of the first parameter,
    849 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
    850 ///    specified by the second parameter, which is a 64-bit integer. High-order
    851 ///    bits are filled with the sign bit of the initial value of each 16-bit
    852 ///    element. The 16-bit results are packed into a 64-bit integer vector of
    853 ///    [4 x i16].
    854 ///
    855 /// \headerfile <x86intrin.h>
    856 ///
    857 /// This intrinsic corresponds to the \c PSRAW instruction.
    858 ///
    859 /// \param __m
    860 ///    A 64-bit integer vector of [4 x i16].
    861 /// \param __count
    862 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    863 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
    864 ///    values.
    865 static __inline__ __m64 __DEFAULT_FN_ATTRS
    866 _mm_sra_pi16(__m64 __m, __m64 __count)
    867 {
    868     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
    869 }
    870 
    871 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
    872 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
    873 ///    High-order bits are filled with the sign bit of the initial value of each
    874 ///    16-bit element. The 16-bit results are packed into a 64-bit integer
    875 ///    vector of [4 x i16].
    876 ///
    877 /// \headerfile <x86intrin.h>
    878 ///
    879 /// This intrinsic corresponds to the \c PSRAW instruction.
    880 ///
    881 /// \param __m
    882 ///    A 64-bit integer vector of [4 x i16].
    883 /// \param __count
    884 ///    A 32-bit integer value.
    885 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
    886 ///    values.
    887 static __inline__ __m64 __DEFAULT_FN_ATTRS
    888 _mm_srai_pi16(__m64 __m, int __count)
    889 {
    890     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
    891 }
    892 
    893 /// \brief Right-shifts each 32-bit integer element of the first parameter,
    894 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
    895 ///    specified by the second parameter, which is a 64-bit integer. High-order
    896 ///    bits are filled with the sign bit of the initial value of each 32-bit
    897 ///    element. The 32-bit results are packed into a 64-bit integer vector of
    898 ///    [2 x i32].
    899 ///
    900 /// \headerfile <x86intrin.h>
    901 ///
    902 /// This intrinsic corresponds to the \c PSRAD instruction.
    903 ///
    904 /// \param __m
    905 ///    A 64-bit integer vector of [2 x i32].
    906 /// \param __count
    907 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    908 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
    909 ///    values.
    910 static __inline__ __m64 __DEFAULT_FN_ATTRS
    911 _mm_sra_pi32(__m64 __m, __m64 __count)
    912 {
    913     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
    914 }
    915 
    916 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
    917 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
    918 ///    High-order bits are filled with the sign bit of the initial value of each
    919 ///    32-bit element. The 32-bit results are packed into a 64-bit integer
    920 ///    vector of [2 x i32].
    921 ///
    922 /// \headerfile <x86intrin.h>
    923 ///
    924 /// This intrinsic corresponds to the \c PSRAD instruction.
    925 ///
    926 /// \param __m
    927 ///    A 64-bit integer vector of [2 x i32].
    928 /// \param __count
    929 ///    A 32-bit integer value.
    930 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
    931 ///    values.
    932 static __inline__ __m64 __DEFAULT_FN_ATTRS
    933 _mm_srai_pi32(__m64 __m, int __count)
    934 {
    935     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
    936 }
    937 
    938 /// \brief Right-shifts each 16-bit integer element of the first parameter,
    939 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
    940 ///    specified by the second parameter, which is a 64-bit integer. High-order
    941 ///    bits are cleared. The 16-bit results are packed into a 64-bit integer
    942 ///    vector of [4 x i16].
    943 ///
    944 /// \headerfile <x86intrin.h>
    945 ///
    946 /// This intrinsic corresponds to the \c PSRLW instruction.
    947 ///
    948 /// \param __m
    949 ///    A 64-bit integer vector of [4 x i16].
    950 /// \param __count
    951 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    952 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
    953 ///    values.
    954 static __inline__ __m64 __DEFAULT_FN_ATTRS
    955 _mm_srl_pi16(__m64 __m, __m64 __count)
    956 {
    957     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
    958 }
    959 
    960 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
    961 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
    962 ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
    963 ///    integer vector of [4 x i16].
    964 ///
    965 /// \headerfile <x86intrin.h>
    966 ///
    967 /// This intrinsic corresponds to the \c PSRLW instruction.
    968 ///
    969 /// \param __m
    970 ///    A 64-bit integer vector of [4 x i16].
    971 /// \param __count
    972 ///    A 32-bit integer value.
    973 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
    974 ///    values.
    975 static __inline__ __m64 __DEFAULT_FN_ATTRS
    976 _mm_srli_pi16(__m64 __m, int __count)
    977 {
    978     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
    979 }
    980 
    981 /// \brief Right-shifts each 32-bit integer element of the first parameter,
    982 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
    983 ///    specified by the second parameter, which is a 64-bit integer. High-order
    984 ///    bits are cleared. The 32-bit results are packed into a 64-bit integer
    985 ///    vector of [2 x i32].
    986 ///
    987 /// \headerfile <x86intrin.h>
    988 ///
    989 /// This intrinsic corresponds to the \c PSRLD instruction.
    990 ///
    991 /// \param __m
    992 ///    A 64-bit integer vector of [2 x i32].
    993 /// \param __count
    994 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
    995 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
    996 ///    values.
    997 static __inline__ __m64 __DEFAULT_FN_ATTRS
    998 _mm_srl_pi32(__m64 __m, __m64 __count)
    999 {
   1000     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
   1001 }
   1002 
   1003 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
   1004 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
   1005 ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
   1006 ///    integer vector of [2 x i32].
   1007 ///
   1008 /// \headerfile <x86intrin.h>
   1009 ///
   1010 /// This intrinsic corresponds to the \c PSRLD instruction.
   1011 ///
   1012 /// \param __m
   1013 ///    A 64-bit integer vector of [2 x i32].
   1014 /// \param __count
   1015 ///    A 32-bit integer value.
   1016 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
   1017 ///    values.
   1018 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1019 _mm_srli_pi32(__m64 __m, int __count)
   1020 {
   1021     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
   1022 }
   1023 
   1024 /// \brief Right-shifts the first 64-bit integer parameter by the number of bits
   1025 ///    specified by the second 64-bit integer parameter. High-order bits are
   1026 ///    cleared.
   1027 ///
   1028 /// \headerfile <x86intrin.h>
   1029 ///
   1030 /// This intrinsic corresponds to the \c PSRLQ instruction.
   1031 ///
   1032 /// \param __m
   1033 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
   1034 /// \param __count
   1035 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
   1036 /// \returns A 64-bit integer vector containing the right-shifted value.
   1037 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1038 _mm_srl_si64(__m64 __m, __m64 __count)
   1039 {
   1040     return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
   1041 }
   1042 
   1043 /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
   1044 ///    number of bits specified by the second parameter, which is a 32-bit
   1045 ///    integer. High-order bits are cleared.
   1046 ///
   1047 /// \headerfile <x86intrin.h>
   1048 ///
   1049 /// This intrinsic corresponds to the \c PSRLQ instruction.
   1050 ///
   1051 /// \param __m
   1052 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
   1053 /// \param __count
   1054 ///    A 32-bit integer value.
   1055 /// \returns A 64-bit integer vector containing the right-shifted value.
   1056 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1057 _mm_srli_si64(__m64 __m, int __count)
   1058 {
   1059     return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
   1060 }
   1061 
   1062 /// \brief Performs a bitwise AND of two 64-bit integer vectors.
   1063 ///
   1064 /// \headerfile <x86intrin.h>
   1065 ///
   1066 /// This intrinsic corresponds to the \c PAND instruction.
   1067 ///
   1068 /// \param __m1
   1069 ///    A 64-bit integer vector.
   1070 /// \param __m2
   1071 ///    A 64-bit integer vector.
   1072 /// \returns A 64-bit integer vector containing the bitwise AND of both
   1073 ///    parameters.
   1074 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1075 _mm_and_si64(__m64 __m1, __m64 __m2)
   1076 {
   1077     return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
   1078 }
   1079 
   1080 /// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
   1081 ///    performs a bitwise AND of the intermediate result and the second 64-bit
   1082 ///    integer vector.
   1083 ///
   1084 /// \headerfile <x86intrin.h>
   1085 ///
   1086 /// This intrinsic corresponds to the \c PANDN instruction.
   1087 ///
   1088 /// \param __m1
   1089 ///    A 64-bit integer vector. The one's complement of this parameter is used
   1090 ///    in the bitwise AND.
   1091 /// \param __m2
   1092 ///    A 64-bit integer vector.
   1093 /// \returns A 64-bit integer vector containing the bitwise AND of the second
   1094 ///    parameter and the one's complement of the first parameter.
   1095 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1096 _mm_andnot_si64(__m64 __m1, __m64 __m2)
   1097 {
   1098     return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
   1099 }
   1100 
   1101 /// \brief Performs a bitwise OR of two 64-bit integer vectors.
   1102 ///
   1103 /// \headerfile <x86intrin.h>
   1104 ///
   1105 /// This intrinsic corresponds to the \c POR instruction.
   1106 ///
   1107 /// \param __m1
   1108 ///    A 64-bit integer vector.
   1109 /// \param __m2
   1110 ///    A 64-bit integer vector.
   1111 /// \returns A 64-bit integer vector containing the bitwise OR of both
   1112 ///    parameters.
   1113 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1114 _mm_or_si64(__m64 __m1, __m64 __m2)
   1115 {
   1116     return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
   1117 }
   1118 
   1119 /// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
   1120 ///
   1121 /// \headerfile <x86intrin.h>
   1122 ///
   1123 /// This intrinsic corresponds to the \c PXOR instruction.
   1124 ///
   1125 /// \param __m1
   1126 ///    A 64-bit integer vector.
   1127 /// \param __m2
   1128 ///    A 64-bit integer vector.
   1129 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
   1130 ///    parameters.
   1131 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1132 _mm_xor_si64(__m64 __m1, __m64 __m2)
   1133 {
   1134     return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
   1135 }
   1136 
   1137 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
   1138 ///    [8 x i8] to determine if the element of the first vector is equal to the
   1139 ///    corresponding element of the second vector. The comparison yields 0 for
   1140 ///    false, 0xFF for true.
   1141 ///
   1142 /// \headerfile <x86intrin.h>
   1143 ///
   1144 /// This intrinsic corresponds to the \c PCMPEQB instruction.
   1145 ///
   1146 /// \param __m1
   1147 ///    A 64-bit integer vector of [8 x i8].
   1148 /// \param __m2
   1149 ///    A 64-bit integer vector of [8 x i8].
   1150 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
   1151 ///    results.
   1152 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1153 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
   1154 {
   1155     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
   1156 }
   1157 
   1158 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
   1159 ///    [4 x i16] to determine if the element of the first vector is equal to the
   1160 ///    corresponding element of the second vector. The comparison yields 0 for
   1161 ///    false, 0xFFFF for true.
   1162 ///
   1163 /// \headerfile <x86intrin.h>
   1164 ///
   1165 /// This intrinsic corresponds to the \c PCMPEQW instruction.
   1166 ///
   1167 /// \param __m1
   1168 ///    A 64-bit integer vector of [4 x i16].
   1169 /// \param __m2
   1170 ///    A 64-bit integer vector of [4 x i16].
   1171 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
   1172 ///    results.
   1173 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1174 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
   1175 {
   1176     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
   1177 }
   1178 
   1179 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
   1180 ///    [2 x i32] to determine if the element of the first vector is equal to the
   1181 ///    corresponding element of the second vector. The comparison yields 0 for
   1182 ///    false, 0xFFFFFFFF for true.
   1183 ///
   1184 /// \headerfile <x86intrin.h>
   1185 ///
   1186 /// This intrinsic corresponds to the \c PCMPEQD instruction.
   1187 ///
   1188 /// \param __m1
   1189 ///    A 64-bit integer vector of [2 x i32].
   1190 /// \param __m2
   1191 ///    A 64-bit integer vector of [2 x i32].
   1192 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
   1193 ///    results.
   1194 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1195 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
   1196 {
   1197     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
   1198 }
   1199 
   1200 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
   1201 ///    [8 x i8] to determine if the element of the first vector is greater than
   1202 ///    the corresponding element of the second vector. The comparison yields 0
   1203 ///    for false, 0xFF for true.
   1204 ///
   1205 /// \headerfile <x86intrin.h>
   1206 ///
   1207 /// This intrinsic corresponds to the \c PCMPGTB instruction.
   1208 ///
   1209 /// \param __m1
   1210 ///    A 64-bit integer vector of [8 x i8].
   1211 /// \param __m2
   1212 ///    A 64-bit integer vector of [8 x i8].
   1213 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
   1214 ///    results.
   1215 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1216 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
   1217 {
   1218     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
   1219 }
   1220 
   1221 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
   1222 ///    [4 x i16] to determine if the element of the first vector is greater than
   1223 ///    the corresponding element of the second vector. The comparison yields 0
   1224 ///    for false, 0xFFFF for true.
   1225 ///
   1226 /// \headerfile <x86intrin.h>
   1227 ///
   1228 /// This intrinsic corresponds to the \c PCMPGTW instruction.
   1229 ///
   1230 /// \param __m1
   1231 ///    A 64-bit integer vector of [4 x i16].
   1232 /// \param __m2
   1233 ///    A 64-bit integer vector of [4 x i16].
   1234 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
   1235 ///    results.
   1236 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1237 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
   1238 {
   1239     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
   1240 }
   1241 
   1242 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
   1243 ///    [2 x i32] to determine if the element of the first vector is greater than
   1244 ///    the corresponding element of the second vector. The comparison yields 0
   1245 ///    for false, 0xFFFFFFFF for true.
   1246 ///
   1247 /// \headerfile <x86intrin.h>
   1248 ///
   1249 /// This intrinsic corresponds to the \c PCMPGTD instruction.
   1250 ///
   1251 /// \param __m1
   1252 ///    A 64-bit integer vector of [2 x i32].
   1253 /// \param __m2
   1254 ///    A 64-bit integer vector of [2 x i32].
   1255 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
   1256 ///    results.
   1257 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1258 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
   1259 {
   1260     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
   1261 }
   1262 
   1263 /// \brief Constructs a 64-bit integer vector initialized to zero.
   1264 ///
   1265 /// \headerfile <x86intrin.h>
   1266 ///
   1267 /// This intrinsic corresponds to the the \c VXORPS / XORPS instruction.
   1268 ///
   1269 /// \returns An initialized 64-bit integer vector with all elements set to zero.
   1270 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1271 _mm_setzero_si64(void)
   1272 {
   1273     return (__m64){ 0LL };
   1274 }
   1275 
   1276 /// \brief Constructs a 64-bit integer vector initialized with the specified
   1277 ///    32-bit integer values.
   1278 ///
   1279 /// \headerfile <x86intrin.h>
   1280 ///
   1281 /// This intrinsic is a utility function and does not correspond to a specific
   1282 ///    instruction.
   1283 ///
   1284 /// \param __i1
   1285 ///    A 32-bit integer value used to initialize the upper 32 bits of the
   1286 ///    result.
   1287 /// \param __i0
   1288 ///    A 32-bit integer value used to initialize the lower 32 bits of the
   1289 ///    result.
   1290 /// \returns An initialized 64-bit integer vector.
   1291 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1292 _mm_set_pi32(int __i1, int __i0)
   1293 {
   1294     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
   1295 }
   1296 
   1297 /// \brief Constructs a 64-bit integer vector initialized with the specified
   1298 ///    16-bit integer values.
   1299 ///
   1300 /// \headerfile <x86intrin.h>
   1301 ///
   1302 /// This intrinsic is a utility function and does not correspond to a specific
   1303 ///    instruction.
   1304 ///
   1305 /// \param __s3
   1306 ///    A 16-bit integer value used to initialize bits [63:48] of the result.
   1307 /// \param __s2
   1308 ///    A 16-bit integer value used to initialize bits [47:32] of the result.
   1309 /// \param __s1
   1310 ///    A 16-bit integer value used to initialize bits [31:16] of the result.
   1311 /// \param __s0
   1312 ///    A 16-bit integer value used to initialize bits [15:0] of the result.
   1313 /// \returns An initialized 64-bit integer vector.
   1314 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1315 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
   1316 {
   1317     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
   1318 }
   1319 
   1320 /// \brief Constructs a 64-bit integer vector initialized with the specified
   1321 ///    8-bit integer values.
   1322 ///
   1323 /// \headerfile <x86intrin.h>
   1324 ///
   1325 /// This intrinsic is a utility function and does not correspond to a specific
   1326 ///    instruction.
   1327 ///
   1328 /// \param __b7
   1329 ///    An 8-bit integer value used to initialize bits [63:56] of the result.
   1330 /// \param __b6
   1331 ///    An 8-bit integer value used to initialize bits [55:48] of the result.
   1332 /// \param __b5
   1333 ///    An 8-bit integer value used to initialize bits [47:40] of the result.
   1334 /// \param __b4
   1335 ///    An 8-bit integer value used to initialize bits [39:32] of the result.
   1336 /// \param __b3
   1337 ///    An 8-bit integer value used to initialize bits [31:24] of the result.
   1338 /// \param __b2
   1339 ///    An 8-bit integer value used to initialize bits [23:16] of the result.
   1340 /// \param __b1
   1341 ///    An 8-bit integer value used to initialize bits [15:8] of the result.
   1342 /// \param __b0
   1343 ///    An 8-bit integer value used to initialize bits [7:0] of the result.
   1344 /// \returns An initialized 64-bit integer vector.
   1345 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1346 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
   1347             char __b1, char __b0)
   1348 {
   1349     return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
   1350                                                __b4, __b5, __b6, __b7);
   1351 }
   1352 
   1353 /// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
   1354 ///    32-bit integer vector elements set to the specified 32-bit integer
   1355 ///    value.
   1356 ///
   1357 /// \headerfile <x86intrin.h>
   1358 ///
   1359 /// This intrinsic corresponds to the \c VPSHUFD / PSHUFD instruction.
   1360 ///
   1361 /// \param __i
   1362 ///    A 32-bit integer value used to initialize each vector element of the
   1363 ///    result.
   1364 /// \returns An initialized 64-bit integer vector of [2 x i32].
   1365 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1366 _mm_set1_pi32(int __i)
   1367 {
   1368     return _mm_set_pi32(__i, __i);
   1369 }
   1370 
   1371 /// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
   1372 ///    16-bit integer vector elements set to the specified 16-bit integer
   1373 ///    value.
   1374 ///
   1375 /// \headerfile <x86intrin.h>
   1376 ///
   1377 /// This intrinsic corresponds to the \c VPSHUFLW / PSHUFLW instruction.
   1378 ///
   1379 /// \param __w
   1380 ///    A 16-bit integer value used to initialize each vector element of the
   1381 ///    result.
   1382 /// \returns An initialized 64-bit integer vector of [4 x i16].
   1383 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1384 _mm_set1_pi16(short __w)
   1385 {
   1386     return _mm_set_pi16(__w, __w, __w, __w);
   1387 }
   1388 
   1389 /// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
   1390 ///    8-bit integer vector elements set to the specified 8-bit integer value.
   1391 ///
   1392 /// \headerfile <x86intrin.h>
   1393 ///
   1394 /// This intrinsic corresponds to the \c VPUNPCKLBW + VPSHUFLW / \c PUNPCKLBW +
   1395 ///    PSHUFLW instruction.
   1396 ///
   1397 /// \param __b
   1398 ///    An 8-bit integer value used to initialize each vector element of the
   1399 ///    result.
   1400 /// \returns An initialized 64-bit integer vector of [8 x i8].
   1401 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1402 _mm_set1_pi8(char __b)
   1403 {
   1404     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
   1405 }
   1406 
   1407 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
   1408 ///    the specified 32-bit integer values.
   1409 ///
   1410 /// \headerfile <x86intrin.h>
   1411 ///
   1412 /// This intrinsic is a utility function and does not correspond to a specific
   1413 ///    instruction.
   1414 ///
   1415 /// \param __i0
   1416 ///    A 32-bit integer value used to initialize the lower 32 bits of the
   1417 ///    result.
   1418 /// \param __i1
   1419 ///    A 32-bit integer value used to initialize the upper 32 bits of the
   1420 ///    result.
   1421 /// \returns An initialized 64-bit integer vector.
   1422 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1423 _mm_setr_pi32(int __i0, int __i1)
   1424 {
   1425     return _mm_set_pi32(__i1, __i0);
   1426 }
   1427 
   1428 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
   1429 ///    the specified 16-bit integer values.
   1430 ///
   1431 /// \headerfile <x86intrin.h>
   1432 ///
   1433 /// This intrinsic is a utility function and does not correspond to a specific
   1434 ///    instruction.
   1435 ///
   1436 /// \param __w0
   1437 ///    A 16-bit integer value used to initialize bits [15:0] of the result.
   1438 /// \param __w1
   1439 ///    A 16-bit integer value used to initialize bits [31:16] of the result.
   1440 /// \param __w2
   1441 ///    A 16-bit integer value used to initialize bits [47:32] of the result.
   1442 /// \param __w3
   1443 ///    A 16-bit integer value used to initialize bits [63:48] of the result.
   1444 /// \returns An initialized 64-bit integer vector.
   1445 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1446 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
   1447 {
   1448     return _mm_set_pi16(__w3, __w2, __w1, __w0);
   1449 }
   1450 
   1451 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
   1452 ///    the specified 8-bit integer values.
   1453 ///
   1454 /// \headerfile <x86intrin.h>
   1455 ///
   1456 /// This intrinsic is a utility function and does not correspond to a specific
   1457 ///    instruction.
   1458 ///
   1459 /// \param __b0
   1460 ///    An 8-bit integer value used to initialize bits [7:0] of the result.
   1461 /// \param __b1
   1462 ///    An 8-bit integer value used to initialize bits [15:8] of the result.
   1463 /// \param __b2
   1464 ///    An 8-bit integer value used to initialize bits [23:16] of the result.
   1465 /// \param __b3
   1466 ///    An 8-bit integer value used to initialize bits [31:24] of the result.
   1467 /// \param __b4
   1468 ///    An 8-bit integer value used to initialize bits [39:32] of the result.
   1469 /// \param __b5
   1470 ///    An 8-bit integer value used to initialize bits [47:40] of the result.
   1471 /// \param __b6
   1472 ///    An 8-bit integer value used to initialize bits [55:48] of the result.
   1473 /// \param __b7
   1474 ///    An 8-bit integer value used to initialize bits [63:56] of the result.
   1475 /// \returns An initialized 64-bit integer vector.
   1476 static __inline__ __m64 __DEFAULT_FN_ATTRS
   1477 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
   1478              char __b6, char __b7)
   1479 {
   1480     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
   1481 }
   1482 
   1483 #undef __DEFAULT_FN_ATTRS
   1484 
   1485 /* Aliases for compatibility. */
   1486 #define _m_empty _mm_empty
   1487 #define _m_from_int _mm_cvtsi32_si64
   1488 #define _m_from_int64 _mm_cvtsi64_m64
   1489 #define _m_to_int _mm_cvtsi64_si32
   1490 #define _m_to_int64 _mm_cvtm64_si64
   1491 #define _m_packsswb _mm_packs_pi16
   1492 #define _m_packssdw _mm_packs_pi32
   1493 #define _m_packuswb _mm_packs_pu16
   1494 #define _m_punpckhbw _mm_unpackhi_pi8
   1495 #define _m_punpckhwd _mm_unpackhi_pi16
   1496 #define _m_punpckhdq _mm_unpackhi_pi32
   1497 #define _m_punpcklbw _mm_unpacklo_pi8
   1498 #define _m_punpcklwd _mm_unpacklo_pi16
   1499 #define _m_punpckldq _mm_unpacklo_pi32
   1500 #define _m_paddb _mm_add_pi8
   1501 #define _m_paddw _mm_add_pi16
   1502 #define _m_paddd _mm_add_pi32
   1503 #define _m_paddsb _mm_adds_pi8
   1504 #define _m_paddsw _mm_adds_pi16
   1505 #define _m_paddusb _mm_adds_pu8
   1506 #define _m_paddusw _mm_adds_pu16
   1507 #define _m_psubb _mm_sub_pi8
   1508 #define _m_psubw _mm_sub_pi16
   1509 #define _m_psubd _mm_sub_pi32
   1510 #define _m_psubsb _mm_subs_pi8
   1511 #define _m_psubsw _mm_subs_pi16
   1512 #define _m_psubusb _mm_subs_pu8
   1513 #define _m_psubusw _mm_subs_pu16
   1514 #define _m_pmaddwd _mm_madd_pi16
   1515 #define _m_pmulhw _mm_mulhi_pi16
   1516 #define _m_pmullw _mm_mullo_pi16
   1517 #define _m_psllw _mm_sll_pi16
   1518 #define _m_psllwi _mm_slli_pi16
   1519 #define _m_pslld _mm_sll_pi32
   1520 #define _m_pslldi _mm_slli_pi32
   1521 #define _m_psllq _mm_sll_si64
   1522 #define _m_psllqi _mm_slli_si64
   1523 #define _m_psraw _mm_sra_pi16
   1524 #define _m_psrawi _mm_srai_pi16
   1525 #define _m_psrad _mm_sra_pi32
   1526 #define _m_psradi _mm_srai_pi32
   1527 #define _m_psrlw _mm_srl_pi16
   1528 #define _m_psrlwi _mm_srli_pi16
   1529 #define _m_psrld _mm_srl_pi32
   1530 #define _m_psrldi _mm_srli_pi32
   1531 #define _m_psrlq _mm_srl_si64
   1532 #define _m_psrlqi _mm_srli_si64
   1533 #define _m_pand _mm_and_si64
   1534 #define _m_pandn _mm_andnot_si64
   1535 #define _m_por _mm_or_si64
   1536 #define _m_pxor _mm_xor_si64
   1537 #define _m_pcmpeqb _mm_cmpeq_pi8
   1538 #define _m_pcmpeqw _mm_cmpeq_pi16
   1539 #define _m_pcmpeqd _mm_cmpeq_pi32
   1540 #define _m_pcmpgtb _mm_cmpgt_pi8
   1541 #define _m_pcmpgtw _mm_cmpgt_pi16
   1542 #define _m_pcmpgtd _mm_cmpgt_pi32
   1543 
   1544 #endif /* __MMINTRIN_H */
   1545 
   1546