1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __MMINTRIN_H 25 #define __MMINTRIN_H 26 27 typedef long long __m64 __attribute__((__vector_size__(8))); 28 29 typedef long long __v1di __attribute__((__vector_size__(8))); 30 typedef int __v2si __attribute__((__vector_size__(8))); 31 typedef short __v4hi __attribute__((__vector_size__(8))); 32 typedef char __v8qi __attribute__((__vector_size__(8))); 33 34 /* Define the default attributes for the functions in this file. */ 35 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 36 37 /// \brief Clears the MMX state by setting the state of the x87 stack registers 38 /// to empty. 39 /// 40 /// \headerfile <x86intrin.h> 41 /// 42 /// This intrinsic corresponds to the \c EMMS instruction. 43 /// 44 static __inline__ void __DEFAULT_FN_ATTRS 45 _mm_empty(void) 46 { 47 __builtin_ia32_emms(); 48 } 49 50 /// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the 51 /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 52 /// 53 /// \headerfile <x86intrin.h> 54 /// 55 /// This intrinsic corresponds to the \c VMOVD / MOVD instruction. 56 /// 57 /// \param __i 58 /// A 32-bit integer value. 59 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 60 /// parameter. The upper 32 bits are set to 0. 61 static __inline__ __m64 __DEFAULT_FN_ATTRS 62 _mm_cvtsi32_si64(int __i) 63 { 64 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 65 } 66 67 /// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 68 /// signed integer. 69 /// 70 /// \headerfile <x86intrin.h> 71 /// 72 /// This intrinsic corresponds to the \c VMOVD / MOVD instruction. 73 /// 74 /// \param __m 75 /// A 64-bit integer vector. 76 /// \returns A 32-bit signed integer value containing the lower 32 bits of the 77 /// parameter. 78 static __inline__ int __DEFAULT_FN_ATTRS 79 _mm_cvtsi64_si32(__m64 __m) 80 { 81 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 82 } 83 84 /// \brief Casts a 64-bit signed integer value into a 64-bit integer vector. 85 /// 86 /// \headerfile <x86intrin.h> 87 /// 88 /// This intrinsic corresponds to the \c VMOVQ / MOVD instruction. 89 /// 90 /// \param __i 91 /// A 64-bit signed integer. 92 /// \returns A 64-bit integer vector containing the same bitwise pattern as the 93 /// parameter. 94 static __inline__ __m64 __DEFAULT_FN_ATTRS 95 _mm_cvtsi64_m64(long long __i) 96 { 97 return (__m64)__i; 98 } 99 100 /// \brief Casts a 64-bit integer vector into a 64-bit signed integer value. 101 /// 102 /// \headerfile <x86intrin.h> 103 /// 104 /// This intrinsic corresponds to the \c VMOVQ / MOVD instruction. 105 /// 106 /// \param __m 107 /// A 64-bit integer vector. 108 /// \returns A 64-bit signed integer containing the same bitwise pattern as the 109 /// parameter. 110 static __inline__ long long __DEFAULT_FN_ATTRS 111 _mm_cvtm64_si64(__m64 __m) 112 { 113 return (long long)__m; 114 } 115 116 /// \brief Converts 16-bit signed integers from both 64-bit integer vector 117 /// parameters of [4 x i16] into 8-bit signed integer values, and constructs 118 /// a 64-bit integer vector of [8 x i8] as the result. Positive values 119 /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 120 /// are saturated to 0x80. 121 /// 122 /// \headerfile <x86intrin.h> 123 /// 124 /// This intrinsic corresponds to the \c PACKSSWB instruction. 125 /// 126 /// \param __m1 127 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 128 /// 16-bit signed integer and is converted to an 8-bit signed integer with 129 /// saturation. Positive values greater than 0x7F are saturated to 0x7F. 130 /// Negative values less than 0x80 are saturated to 0x80. The converted 131 /// [4 x i8] values are written to the lower 32 bits of the result. 132 /// \param __m2 133 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 134 /// 16-bit signed integer and is converted to an 8-bit signed integer with 135 /// saturation. Positive values greater than 0x7F are saturated to 0x7F. 136 /// Negative values less than 0x80 are saturated to 0x80. The converted 137 /// [4 x i8] values are written to the upper 32 bits of the result. 138 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 139 /// values. 140 static __inline__ __m64 __DEFAULT_FN_ATTRS 141 _mm_packs_pi16(__m64 __m1, __m64 __m2) 142 { 143 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 144 } 145 146 /// \brief Converts 32-bit signed integers from both 64-bit integer vector 147 /// parameters of [2 x i32] into 16-bit signed integer values, and constructs 148 /// a 64-bit integer vector of [4 x i16] as the result. Positive values 149 /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than 150 /// 0x8000 are saturated to 0x8000. 151 /// 152 /// \headerfile <x86intrin.h> 153 /// 154 /// This intrinsic corresponds to the \c PACKSSDW instruction. 155 /// 156 /// \param __m1 157 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 158 /// 32-bit signed integer and is converted to a 16-bit signed integer with 159 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 160 /// Negative values less than 0x8000 are saturated to 0x8000. The converted 161 /// [2 x i16] values are written to the lower 32 bits of the result. 162 /// \param __m2 163 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 164 /// 32-bit signed integer and is converted to a 16-bit signed integer with 165 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 166 /// Negative values less than 0x8000 are saturated to 0x8000. The converted 167 /// [2 x i16] values are written to the upper 32 bits of the result. 168 /// \returns A 64-bit integer vector of [4 x i16] containing the converted 169 /// values. 170 static __inline__ __m64 __DEFAULT_FN_ATTRS 171 _mm_packs_pi32(__m64 __m1, __m64 __m2) 172 { 173 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 174 } 175 176 /// \brief Converts 16-bit signed integers from both 64-bit integer vector 177 /// parameters of [4 x i16] into 8-bit unsigned integer values, and 178 /// constructs a 64-bit integer vector of [8 x i8] as the result. Values 179 /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated 180 /// to 0. 181 /// 182 /// \headerfile <x86intrin.h> 183 /// 184 /// This intrinsic corresponds to the \c PACKUSWB instruction. 185 /// 186 /// \param __m1 187 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 188 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with 189 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 190 /// than 0 are saturated to 0. The converted [4 x i8] values are written to 191 /// the lower 32 bits of the result. 192 /// \param __m2 193 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 194 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with 195 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 196 /// than 0 are saturated to 0. The converted [4 x i8] values are written to 197 /// the upper 32 bits of the result. 198 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 199 /// values. 200 static __inline__ __m64 __DEFAULT_FN_ATTRS 201 _mm_packs_pu16(__m64 __m1, __m64 __m2) 202 { 203 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 204 } 205 206 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 207 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 208 /// 209 /// \headerfile <x86intrin.h> 210 /// 211 /// This intrinsic corresponds to the \c PUNPCKHBW instruction. 212 /// 213 /// \param __m1 214 /// A 64-bit integer vector of [8 x i8]. 215 /// Bits [39:32] are written to bits [7:0] of the result. 216 /// Bits [47:40] are written to bits [23:16] of the result. 217 /// Bits [55:48] are written to bits [39:32] of the result. 218 /// Bits [63:56] are written to bits [55:48] of the result. 219 /// \param __m2 220 /// A 64-bit integer vector of [8 x i8]. 221 /// Bits [39:32] are written to bits [15:8] of the result. 222 /// Bits [47:40] are written to bits [31:24] of the result. 223 /// Bits [55:48] are written to bits [47:40] of the result. 224 /// Bits [63:56] are written to bits [63:56] of the result. 225 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 226 /// values. 227 static __inline__ __m64 __DEFAULT_FN_ATTRS 228 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 229 { 230 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 231 } 232 233 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of 234 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 235 /// 236 /// \headerfile <x86intrin.h> 237 /// 238 /// This intrinsic corresponds to the \c PUNPCKHWD instruction. 239 /// 240 /// \param __m1 241 /// A 64-bit integer vector of [4 x i16]. 242 /// Bits [47:32] are written to bits [15:0] of the result. 243 /// Bits [63:48] are written to bits [47:32] of the result. 244 /// \param __m2 245 /// A 64-bit integer vector of [4 x i16]. 246 /// Bits [47:32] are written to bits [31:16] of the result. 247 /// Bits [63:48] are written to bits [63:48] of the result. 248 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 249 /// values. 250 static __inline__ __m64 __DEFAULT_FN_ATTRS 251 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 252 { 253 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 254 } 255 256 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of 257 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 258 /// 259 /// \headerfile <x86intrin.h> 260 /// 261 /// This intrinsic corresponds to the \c PUNPCKHDQ instruction. 262 /// 263 /// \param __m1 264 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 265 /// the lower 32 bits of the result. 266 /// \param __m2 267 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 268 /// the upper 32 bits of the result. 269 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 270 /// values. 271 static __inline__ __m64 __DEFAULT_FN_ATTRS 272 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 273 { 274 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 275 } 276 277 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 278 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 279 /// 280 /// \headerfile <x86intrin.h> 281 /// 282 /// This intrinsic corresponds to the \c PUNPCKLBW instruction. 283 /// 284 /// \param __m1 285 /// A 64-bit integer vector of [8 x i8]. 286 /// Bits [7:0] are written to bits [7:0] of the result. 287 /// Bits [15:8] are written to bits [23:16] of the result. 288 /// Bits [23:16] are written to bits [39:32] of the result. 289 /// Bits [31:24] are written to bits [55:48] of the result. 290 /// \param __m2 291 /// A 64-bit integer vector of [8 x i8]. 292 /// Bits [7:0] are written to bits [15:8] of the result. 293 /// Bits [15:8] are written to bits [31:24] of the result. 294 /// Bits [23:16] are written to bits [47:40] of the result. 295 /// Bits [31:24] are written to bits [63:56] of the result. 296 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 297 /// values. 298 static __inline__ __m64 __DEFAULT_FN_ATTRS 299 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 300 { 301 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 302 } 303 304 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of 305 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 306 /// 307 /// \headerfile <x86intrin.h> 308 /// 309 /// This intrinsic corresponds to the \c PUNPCKLWD instruction. 310 /// 311 /// \param __m1 312 /// A 64-bit integer vector of [4 x i16]. 313 /// Bits [15:0] are written to bits [15:0] of the result. 314 /// Bits [31:16] are written to bits [47:32] of the result. 315 /// \param __m2 316 /// A 64-bit integer vector of [4 x i16]. 317 /// Bits [15:0] are written to bits [31:16] of the result. 318 /// Bits [31:16] are written to bits [63:48] of the result. 319 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 320 /// values. 321 static __inline__ __m64 __DEFAULT_FN_ATTRS 322 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 323 { 324 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 325 } 326 327 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of 328 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 329 /// 330 /// \headerfile <x86intrin.h> 331 /// 332 /// This intrinsic corresponds to the \c PUNPCKLDQ instruction. 333 /// 334 /// \param __m1 335 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 336 /// the lower 32 bits of the result. 337 /// \param __m2 338 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 339 /// the upper 32 bits of the result. 340 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 341 /// values. 342 static __inline__ __m64 __DEFAULT_FN_ATTRS 343 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 344 { 345 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 346 } 347 348 /// \brief Adds each 8-bit integer element of the first 64-bit integer vector 349 /// of [8 x i8] to the corresponding 8-bit integer element of the second 350 /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 351 /// packed into a 64-bit integer vector of [8 x i8]. 352 /// 353 /// \headerfile <x86intrin.h> 354 /// 355 /// This intrinsic corresponds to the \c PADDB instruction. 356 /// 357 /// \param __m1 358 /// A 64-bit integer vector of [8 x i8]. 359 /// \param __m2 360 /// A 64-bit integer vector of [8 x i8]. 361 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 362 /// parameters. 363 static __inline__ __m64 __DEFAULT_FN_ATTRS 364 _mm_add_pi8(__m64 __m1, __m64 __m2) 365 { 366 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 367 } 368 369 /// \brief Adds each 16-bit integer element of the first 64-bit integer vector 370 /// of [4 x i16] to the corresponding 16-bit integer element of the second 371 /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 372 /// packed into a 64-bit integer vector of [4 x i16]. 373 /// 374 /// \headerfile <x86intrin.h> 375 /// 376 /// This intrinsic corresponds to the \c PADDW instruction. 377 /// 378 /// \param __m1 379 /// A 64-bit integer vector of [4 x i16]. 380 /// \param __m2 381 /// A 64-bit integer vector of [4 x i16]. 382 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 383 /// parameters. 384 static __inline__ __m64 __DEFAULT_FN_ATTRS 385 _mm_add_pi16(__m64 __m1, __m64 __m2) 386 { 387 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 388 } 389 390 /// \brief Adds each 32-bit integer element of the first 64-bit integer vector 391 /// of [2 x i32] to the corresponding 32-bit integer element of the second 392 /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 393 /// packed into a 64-bit integer vector of [2 x i32]. 394 /// 395 /// \headerfile <x86intrin.h> 396 /// 397 /// This intrinsic corresponds to the \c PADDD instruction. 398 /// 399 /// \param __m1 400 /// A 64-bit integer vector of [2 x i32]. 401 /// \param __m2 402 /// A 64-bit integer vector of [2 x i32]. 403 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 404 /// parameters. 405 static __inline__ __m64 __DEFAULT_FN_ATTRS 406 _mm_add_pi32(__m64 __m1, __m64 __m2) 407 { 408 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 409 } 410 411 /// \brief Adds each 8-bit signed integer element of the first 64-bit integer 412 /// vector of [8 x i8] to the corresponding 8-bit signed integer element of 413 /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than 414 /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to 415 /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. 416 /// 417 /// \headerfile <x86intrin.h> 418 /// 419 /// This intrinsic corresponds to the \c PADDSB instruction. 420 /// 421 /// \param __m1 422 /// A 64-bit integer vector of [8 x i8]. 423 /// \param __m2 424 /// A 64-bit integer vector of [8 x i8]. 425 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 426 /// of both parameters. 427 static __inline__ __m64 __DEFAULT_FN_ATTRS 428 _mm_adds_pi8(__m64 __m1, __m64 __m2) 429 { 430 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 431 } 432 433 /// \brief Adds each 16-bit signed integer element of the first 64-bit integer 434 /// vector of [4 x i16] to the corresponding 16-bit signed integer element of 435 /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than 436 /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are 437 /// saturated to 0x8000. The results are packed into a 64-bit integer vector 438 /// of [4 x i16]. 439 /// 440 /// \headerfile <x86intrin.h> 441 /// 442 /// This intrinsic corresponds to the \c PADDSW instruction. 443 /// 444 /// \param __m1 445 /// A 64-bit integer vector of [4 x i16]. 446 /// \param __m2 447 /// A 64-bit integer vector of [4 x i16]. 448 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 449 /// of both parameters. 450 static __inline__ __m64 __DEFAULT_FN_ATTRS 451 _mm_adds_pi16(__m64 __m1, __m64 __m2) 452 { 453 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 454 } 455 456 /// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer 457 /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of 458 /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are 459 /// saturated to 0xFF. The results are packed into a 64-bit integer vector of 460 /// [8 x i8]. 461 /// 462 /// \headerfile <x86intrin.h> 463 /// 464 /// This intrinsic corresponds to the \c PADDUSB instruction. 465 /// 466 /// \param __m1 467 /// A 64-bit integer vector of [8 x i8]. 468 /// \param __m2 469 /// A 64-bit integer vector of [8 x i8]. 470 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 471 /// unsigned sums of both parameters. 472 static __inline__ __m64 __DEFAULT_FN_ATTRS 473 _mm_adds_pu8(__m64 __m1, __m64 __m2) 474 { 475 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 476 } 477 478 /// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer 479 /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element 480 /// of the second 64-bit integer vector of [4 x i16]. Sums greater than 481 /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit 482 /// integer vector of [4 x i16]. 483 /// 484 /// \headerfile <x86intrin.h> 485 /// 486 /// This intrinsic corresponds to the \c PADDUSW instruction. 487 /// 488 /// \param __m1 489 /// A 64-bit integer vector of [4 x i16]. 490 /// \param __m2 491 /// A 64-bit integer vector of [4 x i16]. 492 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 493 /// unsigned sums of both parameters. 494 static __inline__ __m64 __DEFAULT_FN_ATTRS 495 _mm_adds_pu16(__m64 __m1, __m64 __m2) 496 { 497 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 498 } 499 500 /// \brief Subtracts each 8-bit integer element of the second 64-bit integer 501 /// vector of [8 x i8] from the corresponding 8-bit integer element of the 502 /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 503 /// are packed into a 64-bit integer vector of [8 x i8]. 504 /// 505 /// \headerfile <x86intrin.h> 506 /// 507 /// This intrinsic corresponds to the \c PSUBB instruction. 508 /// 509 /// \param __m1 510 /// A 64-bit integer vector of [8 x i8] containing the minuends. 511 /// \param __m2 512 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 513 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of 514 /// both parameters. 515 static __inline__ __m64 __DEFAULT_FN_ATTRS 516 _mm_sub_pi8(__m64 __m1, __m64 __m2) 517 { 518 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 519 } 520 521 /// \brief Subtracts each 16-bit integer element of the second 64-bit integer 522 /// vector of [4 x i16] from the corresponding 16-bit integer element of the 523 /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 524 /// results are packed into a 64-bit integer vector of [4 x i16]. 525 /// 526 /// \headerfile <x86intrin.h> 527 /// 528 /// This intrinsic corresponds to the \c PSUBW instruction. 529 /// 530 /// \param __m1 531 /// A 64-bit integer vector of [4 x i16] containing the minuends. 532 /// \param __m2 533 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 534 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of 535 /// both parameters. 536 static __inline__ __m64 __DEFAULT_FN_ATTRS 537 _mm_sub_pi16(__m64 __m1, __m64 __m2) 538 { 539 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 540 } 541 542 /// \brief Subtracts each 32-bit integer element of the second 64-bit integer 543 /// vector of [2 x i32] from the corresponding 32-bit integer element of the 544 /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 545 /// results are packed into a 64-bit integer vector of [2 x i32]. 546 /// 547 /// \headerfile <x86intrin.h> 548 /// 549 /// This intrinsic corresponds to the \c PSUBD instruction. 550 /// 551 /// \param __m1 552 /// A 64-bit integer vector of [2 x i32] containing the minuends. 553 /// \param __m2 554 /// A 64-bit integer vector of [2 x i32] containing the subtrahends. 555 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of 556 /// both parameters. 557 static __inline__ __m64 __DEFAULT_FN_ATTRS 558 _mm_sub_pi32(__m64 __m1, __m64 __m2) 559 { 560 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 561 } 562 563 /// \brief Subtracts each 8-bit signed integer element of the second 64-bit 564 /// integer vector of [8 x i8] from the corresponding 8-bit signed integer 565 /// element of the first 64-bit integer vector of [8 x i8]. Positive results 566 /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 567 /// are saturated to 0x80. The results are packed into a 64-bit integer 568 /// vector of [8 x i8]. 569 /// 570 /// \headerfile <x86intrin.h> 571 /// 572 /// This intrinsic corresponds to the \c PSUBSB instruction. 573 /// 574 /// \param __m1 575 /// A 64-bit integer vector of [8 x i8] containing the minuends. 576 /// \param __m2 577 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 578 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 579 /// differences of both parameters. 580 static __inline__ __m64 __DEFAULT_FN_ATTRS 581 _mm_subs_pi8(__m64 __m1, __m64 __m2) 582 { 583 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 584 } 585 586 /// \brief Subtracts each 16-bit signed integer element of the second 64-bit 587 /// integer vector of [4 x i16] from the corresponding 16-bit signed integer 588 /// element of the first 64-bit integer vector of [4 x i16]. Positive results 589 /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than 590 /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit 591 /// integer vector of [4 x i16]. 592 /// 593 /// \headerfile <x86intrin.h> 594 /// 595 /// This intrinsic corresponds to the \c PSUBSW instruction. 596 /// 597 /// \param __m1 598 /// A 64-bit integer vector of [4 x i16] containing the minuends. 599 /// \param __m2 600 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 601 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 602 /// differences of both parameters. 603 static __inline__ __m64 __DEFAULT_FN_ATTRS 604 _mm_subs_pi16(__m64 __m1, __m64 __m2) 605 { 606 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 607 } 608 609 /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit 610 /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 611 /// element of the first 64-bit integer vector of [8 x i8]. If an element of 612 /// the first vector is less than the corresponding element of the second 613 /// vector, the result is saturated to 0. The results are packed into a 614 /// 64-bit integer vector of [8 x i8]. 615 /// 616 /// \headerfile <x86intrin.h> 617 /// 618 /// This intrinsic corresponds to the \c PSUBUSB instruction. 619 /// 620 /// \param __m1 621 /// A 64-bit integer vector of [8 x i8] containing the minuends. 622 /// \param __m2 623 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 624 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 625 /// differences of both parameters. 626 static __inline__ __m64 __DEFAULT_FN_ATTRS 627 _mm_subs_pu8(__m64 __m1, __m64 __m2) 628 { 629 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 630 } 631 632 /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit 633 /// integer vector of [4 x i16] from the corresponding 16-bit unsigned 634 /// integer element of the first 64-bit integer vector of [4 x i16]. If an 635 /// element of the first vector is less than the corresponding element of the 636 /// second vector, the result is saturated to 0. The results are packed into 637 /// a 64-bit integer vector of [4 x i16]. 638 /// 639 /// \headerfile <x86intrin.h> 640 /// 641 /// This intrinsic corresponds to the \c PSUBUSW instruction. 642 /// 643 /// \param __m1 644 /// A 64-bit integer vector of [4 x i16] containing the minuends. 645 /// \param __m2 646 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 647 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 648 /// differences of both parameters. 649 static __inline__ __m64 __DEFAULT_FN_ATTRS 650 _mm_subs_pu16(__m64 __m1, __m64 __m2) 651 { 652 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 653 } 654 655 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit 656 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 657 /// element of the second 64-bit integer vector of [4 x i16] and get four 658 /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 659 /// The lower 32 bits of these two sums are packed into a 64-bit integer 660 /// vector of [2 x i32]. For example, bits [15:0] of both parameters are 661 /// multiplied, bits [31:16] of both parameters are multiplied, and the sum 662 /// of both results is written to bits [31:0] of the result. 663 /// 664 /// \headerfile <x86intrin.h> 665 /// 666 /// This intrinsic corresponds to the \c PMADDWD instruction. 667 /// 668 /// \param __m1 669 /// A 64-bit integer vector of [4 x i16]. 670 /// \param __m2 671 /// A 64-bit integer vector of [4 x i16]. 672 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of 673 /// products of both parameters. 674 static __inline__ __m64 __DEFAULT_FN_ATTRS 675 _mm_madd_pi16(__m64 __m1, __m64 __m2) 676 { 677 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 678 } 679 680 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit 681 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 682 /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 683 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 684 /// 685 /// \headerfile <x86intrin.h> 686 /// 687 /// This intrinsic corresponds to the \c PMULHW instruction. 688 /// 689 /// \param __m1 690 /// A 64-bit integer vector of [4 x i16]. 691 /// \param __m2 692 /// A 64-bit integer vector of [4 x i16]. 693 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 694 /// of the products of both parameters. 695 static __inline__ __m64 __DEFAULT_FN_ATTRS 696 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 697 { 698 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 699 } 700 701 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit 702 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 703 /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 704 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 705 /// 706 /// \headerfile <x86intrin.h> 707 /// 708 /// This intrinsic corresponds to the \c PMULLW instruction. 709 /// 710 /// \param __m1 711 /// A 64-bit integer vector of [4 x i16]. 712 /// \param __m2 713 /// A 64-bit integer vector of [4 x i16]. 714 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 715 /// of the products of both parameters. 716 static __inline__ __m64 __DEFAULT_FN_ATTRS 717 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 718 { 719 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 720 } 721 722 /// \brief Left-shifts each 16-bit signed integer element of the first 723 /// parameter, which is a 64-bit integer vector of [4 x i16], by the number 724 /// of bits specified by the second parameter, which is a 64-bit integer. The 725 /// lower 16 bits of the results are packed into a 64-bit integer vector of 726 /// [4 x i16]. 727 /// 728 /// \headerfile <x86intrin.h> 729 /// 730 /// This intrinsic corresponds to the \c PSLLW instruction. 731 /// 732 /// \param __m 733 /// A 64-bit integer vector of [4 x i16]. 734 /// \param __count 735 /// A 64-bit integer vector interpreted as a single 64-bit integer. 736 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 737 /// values. If __count is greater or equal to 16, the result is set to all 0. 738 static __inline__ __m64 __DEFAULT_FN_ATTRS 739 _mm_sll_pi16(__m64 __m, __m64 __count) 740 { 741 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 742 } 743 744 /// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer 745 /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 746 /// The lower 16 bits of the results are packed into a 64-bit integer vector 747 /// of [4 x i16]. 748 /// 749 /// \headerfile <x86intrin.h> 750 /// 751 /// This intrinsic corresponds to the \c PSLLW instruction. 752 /// 753 /// \param __m 754 /// A 64-bit integer vector of [4 x i16]. 755 /// \param __count 756 /// A 32-bit integer value. 757 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 758 /// values. If __count is greater or equal to 16, the result is set to all 0. 759 static __inline__ __m64 __DEFAULT_FN_ATTRS 760 _mm_slli_pi16(__m64 __m, int __count) 761 { 762 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 763 } 764 765 /// \brief Left-shifts each 32-bit signed integer element of the first 766 /// parameter, which is a 64-bit integer vector of [2 x i32], by the number 767 /// of bits specified by the second parameter, which is a 64-bit integer. The 768 /// lower 32 bits of the results are packed into a 64-bit integer vector of 769 /// [2 x i32]. 770 /// 771 /// \headerfile <x86intrin.h> 772 /// 773 /// This intrinsic corresponds to the \c PSLLD instruction. 774 /// 775 /// \param __m 776 /// A 64-bit integer vector of [2 x i32]. 777 /// \param __count 778 /// A 64-bit integer vector interpreted as a single 64-bit integer. 779 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 780 /// values. If __count is greater or equal to 32, the result is set to all 0. 781 static __inline__ __m64 __DEFAULT_FN_ATTRS 782 _mm_sll_pi32(__m64 __m, __m64 __count) 783 { 784 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 785 } 786 787 /// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer 788 /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 789 /// The lower 32 bits of the results are packed into a 64-bit integer vector 790 /// of [2 x i32]. 791 /// 792 /// \headerfile <x86intrin.h> 793 /// 794 /// This intrinsic corresponds to the \c PSLLD instruction. 795 /// 796 /// \param __m 797 /// A 64-bit integer vector of [2 x i32]. 798 /// \param __count 799 /// A 32-bit integer value. 800 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 801 /// values. If __count is greater or equal to 32, the result is set to all 0. 802 static __inline__ __m64 __DEFAULT_FN_ATTRS 803 _mm_slli_pi32(__m64 __m, int __count) 804 { 805 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 806 } 807 808 /// \brief Left-shifts the first 64-bit integer parameter by the number of bits 809 /// specified by the second 64-bit integer parameter. The lower 64 bits of 810 /// result are returned. 811 /// 812 /// \headerfile <x86intrin.h> 813 /// 814 /// This intrinsic corresponds to the \c PSLLQ instruction. 815 /// 816 /// \param __m 817 /// A 64-bit integer vector interpreted as a single 64-bit integer. 818 /// \param __count 819 /// A 64-bit integer vector interpreted as a single 64-bit integer. 820 /// \returns A 64-bit integer vector containing the left-shifted value. If 821 /// __count is greater or equal to 64, the result is set to 0. 822 static __inline__ __m64 __DEFAULT_FN_ATTRS 823 _mm_sll_si64(__m64 __m, __m64 __count) 824 { 825 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 826 } 827 828 /// \brief Left-shifts the first parameter, which is a 64-bit integer, by the 829 /// number of bits specified by the second parameter, which is a 32-bit 830 /// integer. The lower 64 bits of result are returned. 831 /// 832 /// \headerfile <x86intrin.h> 833 /// 834 /// This intrinsic corresponds to the \c PSLLQ instruction. 835 /// 836 /// \param __m 837 /// A 64-bit integer vector interpreted as a single 64-bit integer. 838 /// \param __count 839 /// A 32-bit integer value. 840 /// \returns A 64-bit integer vector containing the left-shifted value. If 841 /// __count is greater or equal to 64, the result is set to 0. 842 static __inline__ __m64 __DEFAULT_FN_ATTRS 843 _mm_slli_si64(__m64 __m, int __count) 844 { 845 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 846 } 847 848 /// \brief Right-shifts each 16-bit integer element of the first parameter, 849 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 850 /// specified by the second parameter, which is a 64-bit integer. High-order 851 /// bits are filled with the sign bit of the initial value of each 16-bit 852 /// element. The 16-bit results are packed into a 64-bit integer vector of 853 /// [4 x i16]. 854 /// 855 /// \headerfile <x86intrin.h> 856 /// 857 /// This intrinsic corresponds to the \c PSRAW instruction. 858 /// 859 /// \param __m 860 /// A 64-bit integer vector of [4 x i16]. 861 /// \param __count 862 /// A 64-bit integer vector interpreted as a single 64-bit integer. 863 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 864 /// values. 865 static __inline__ __m64 __DEFAULT_FN_ATTRS 866 _mm_sra_pi16(__m64 __m, __m64 __count) 867 { 868 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 869 } 870 871 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector 872 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 873 /// High-order bits are filled with the sign bit of the initial value of each 874 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 875 /// vector of [4 x i16]. 876 /// 877 /// \headerfile <x86intrin.h> 878 /// 879 /// This intrinsic corresponds to the \c PSRAW instruction. 880 /// 881 /// \param __m 882 /// A 64-bit integer vector of [4 x i16]. 883 /// \param __count 884 /// A 32-bit integer value. 885 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 886 /// values. 887 static __inline__ __m64 __DEFAULT_FN_ATTRS 888 _mm_srai_pi16(__m64 __m, int __count) 889 { 890 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 891 } 892 893 /// \brief Right-shifts each 32-bit integer element of the first parameter, 894 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 895 /// specified by the second parameter, which is a 64-bit integer. High-order 896 /// bits are filled with the sign bit of the initial value of each 32-bit 897 /// element. The 32-bit results are packed into a 64-bit integer vector of 898 /// [2 x i32]. 899 /// 900 /// \headerfile <x86intrin.h> 901 /// 902 /// This intrinsic corresponds to the \c PSRAD instruction. 903 /// 904 /// \param __m 905 /// A 64-bit integer vector of [2 x i32]. 906 /// \param __count 907 /// A 64-bit integer vector interpreted as a single 64-bit integer. 908 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 909 /// values. 910 static __inline__ __m64 __DEFAULT_FN_ATTRS 911 _mm_sra_pi32(__m64 __m, __m64 __count) 912 { 913 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 914 } 915 916 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector 917 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 918 /// High-order bits are filled with the sign bit of the initial value of each 919 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 920 /// vector of [2 x i32]. 921 /// 922 /// \headerfile <x86intrin.h> 923 /// 924 /// This intrinsic corresponds to the \c PSRAD instruction. 925 /// 926 /// \param __m 927 /// A 64-bit integer vector of [2 x i32]. 928 /// \param __count 929 /// A 32-bit integer value. 930 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 931 /// values. 932 static __inline__ __m64 __DEFAULT_FN_ATTRS 933 _mm_srai_pi32(__m64 __m, int __count) 934 { 935 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 936 } 937 938 /// \brief Right-shifts each 16-bit integer element of the first parameter, 939 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 940 /// specified by the second parameter, which is a 64-bit integer. High-order 941 /// bits are cleared. The 16-bit results are packed into a 64-bit integer 942 /// vector of [4 x i16]. 943 /// 944 /// \headerfile <x86intrin.h> 945 /// 946 /// This intrinsic corresponds to the \c PSRLW instruction. 947 /// 948 /// \param __m 949 /// A 64-bit integer vector of [4 x i16]. 950 /// \param __count 951 /// A 64-bit integer vector interpreted as a single 64-bit integer. 952 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 953 /// values. 954 static __inline__ __m64 __DEFAULT_FN_ATTRS 955 _mm_srl_pi16(__m64 __m, __m64 __count) 956 { 957 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 958 } 959 960 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector 961 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 962 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 963 /// integer vector of [4 x i16]. 964 /// 965 /// \headerfile <x86intrin.h> 966 /// 967 /// This intrinsic corresponds to the \c PSRLW instruction. 968 /// 969 /// \param __m 970 /// A 64-bit integer vector of [4 x i16]. 971 /// \param __count 972 /// A 32-bit integer value. 973 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 974 /// values. 975 static __inline__ __m64 __DEFAULT_FN_ATTRS 976 _mm_srli_pi16(__m64 __m, int __count) 977 { 978 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 979 } 980 981 /// \brief Right-shifts each 32-bit integer element of the first parameter, 982 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 983 /// specified by the second parameter, which is a 64-bit integer. High-order 984 /// bits are cleared. The 32-bit results are packed into a 64-bit integer 985 /// vector of [2 x i32]. 986 /// 987 /// \headerfile <x86intrin.h> 988 /// 989 /// This intrinsic corresponds to the \c PSRLD instruction. 990 /// 991 /// \param __m 992 /// A 64-bit integer vector of [2 x i32]. 993 /// \param __count 994 /// A 64-bit integer vector interpreted as a single 64-bit integer. 995 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 996 /// values. 997 static __inline__ __m64 __DEFAULT_FN_ATTRS 998 _mm_srl_pi32(__m64 __m, __m64 __count) 999 { 1000 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1001 } 1002 1003 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector 1004 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 1005 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1006 /// integer vector of [2 x i32]. 1007 /// 1008 /// \headerfile <x86intrin.h> 1009 /// 1010 /// This intrinsic corresponds to the \c PSRLD instruction. 1011 /// 1012 /// \param __m 1013 /// A 64-bit integer vector of [2 x i32]. 1014 /// \param __count 1015 /// A 32-bit integer value. 1016 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1017 /// values. 1018 static __inline__ __m64 __DEFAULT_FN_ATTRS 1019 _mm_srli_pi32(__m64 __m, int __count) 1020 { 1021 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1022 } 1023 1024 /// \brief Right-shifts the first 64-bit integer parameter by the number of bits 1025 /// specified by the second 64-bit integer parameter. High-order bits are 1026 /// cleared. 1027 /// 1028 /// \headerfile <x86intrin.h> 1029 /// 1030 /// This intrinsic corresponds to the \c PSRLQ instruction. 1031 /// 1032 /// \param __m 1033 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1034 /// \param __count 1035 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1036 /// \returns A 64-bit integer vector containing the right-shifted value. 1037 static __inline__ __m64 __DEFAULT_FN_ATTRS 1038 _mm_srl_si64(__m64 __m, __m64 __count) 1039 { 1040 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1041 } 1042 1043 /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the 1044 /// number of bits specified by the second parameter, which is a 32-bit 1045 /// integer. High-order bits are cleared. 1046 /// 1047 /// \headerfile <x86intrin.h> 1048 /// 1049 /// This intrinsic corresponds to the \c PSRLQ instruction. 1050 /// 1051 /// \param __m 1052 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1053 /// \param __count 1054 /// A 32-bit integer value. 1055 /// \returns A 64-bit integer vector containing the right-shifted value. 1056 static __inline__ __m64 __DEFAULT_FN_ATTRS 1057 _mm_srli_si64(__m64 __m, int __count) 1058 { 1059 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1060 } 1061 1062 /// \brief Performs a bitwise AND of two 64-bit integer vectors. 1063 /// 1064 /// \headerfile <x86intrin.h> 1065 /// 1066 /// This intrinsic corresponds to the \c PAND instruction. 1067 /// 1068 /// \param __m1 1069 /// A 64-bit integer vector. 1070 /// \param __m2 1071 /// A 64-bit integer vector. 1072 /// \returns A 64-bit integer vector containing the bitwise AND of both 1073 /// parameters. 1074 static __inline__ __m64 __DEFAULT_FN_ATTRS 1075 _mm_and_si64(__m64 __m1, __m64 __m2) 1076 { 1077 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1078 } 1079 1080 /// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then 1081 /// performs a bitwise AND of the intermediate result and the second 64-bit 1082 /// integer vector. 1083 /// 1084 /// \headerfile <x86intrin.h> 1085 /// 1086 /// This intrinsic corresponds to the \c PANDN instruction. 1087 /// 1088 /// \param __m1 1089 /// A 64-bit integer vector. The one's complement of this parameter is used 1090 /// in the bitwise AND. 1091 /// \param __m2 1092 /// A 64-bit integer vector. 1093 /// \returns A 64-bit integer vector containing the bitwise AND of the second 1094 /// parameter and the one's complement of the first parameter. 1095 static __inline__ __m64 __DEFAULT_FN_ATTRS 1096 _mm_andnot_si64(__m64 __m1, __m64 __m2) 1097 { 1098 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1099 } 1100 1101 /// \brief Performs a bitwise OR of two 64-bit integer vectors. 1102 /// 1103 /// \headerfile <x86intrin.h> 1104 /// 1105 /// This intrinsic corresponds to the \c POR instruction. 1106 /// 1107 /// \param __m1 1108 /// A 64-bit integer vector. 1109 /// \param __m2 1110 /// A 64-bit integer vector. 1111 /// \returns A 64-bit integer vector containing the bitwise OR of both 1112 /// parameters. 1113 static __inline__ __m64 __DEFAULT_FN_ATTRS 1114 _mm_or_si64(__m64 __m1, __m64 __m2) 1115 { 1116 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1117 } 1118 1119 /// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors. 1120 /// 1121 /// \headerfile <x86intrin.h> 1122 /// 1123 /// This intrinsic corresponds to the \c PXOR instruction. 1124 /// 1125 /// \param __m1 1126 /// A 64-bit integer vector. 1127 /// \param __m2 1128 /// A 64-bit integer vector. 1129 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1130 /// parameters. 1131 static __inline__ __m64 __DEFAULT_FN_ATTRS 1132 _mm_xor_si64(__m64 __m1, __m64 __m2) 1133 { 1134 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1135 } 1136 1137 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of 1138 /// [8 x i8] to determine if the element of the first vector is equal to the 1139 /// corresponding element of the second vector. The comparison yields 0 for 1140 /// false, 0xFF for true. 1141 /// 1142 /// \headerfile <x86intrin.h> 1143 /// 1144 /// This intrinsic corresponds to the \c PCMPEQB instruction. 1145 /// 1146 /// \param __m1 1147 /// A 64-bit integer vector of [8 x i8]. 1148 /// \param __m2 1149 /// A 64-bit integer vector of [8 x i8]. 1150 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1151 /// results. 1152 static __inline__ __m64 __DEFAULT_FN_ATTRS 1153 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1154 { 1155 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1156 } 1157 1158 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of 1159 /// [4 x i16] to determine if the element of the first vector is equal to the 1160 /// corresponding element of the second vector. The comparison yields 0 for 1161 /// false, 0xFFFF for true. 1162 /// 1163 /// \headerfile <x86intrin.h> 1164 /// 1165 /// This intrinsic corresponds to the \c PCMPEQW instruction. 1166 /// 1167 /// \param __m1 1168 /// A 64-bit integer vector of [4 x i16]. 1169 /// \param __m2 1170 /// A 64-bit integer vector of [4 x i16]. 1171 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1172 /// results. 1173 static __inline__ __m64 __DEFAULT_FN_ATTRS 1174 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1175 { 1176 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1177 } 1178 1179 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of 1180 /// [2 x i32] to determine if the element of the first vector is equal to the 1181 /// corresponding element of the second vector. The comparison yields 0 for 1182 /// false, 0xFFFFFFFF for true. 1183 /// 1184 /// \headerfile <x86intrin.h> 1185 /// 1186 /// This intrinsic corresponds to the \c PCMPEQD instruction. 1187 /// 1188 /// \param __m1 1189 /// A 64-bit integer vector of [2 x i32]. 1190 /// \param __m2 1191 /// A 64-bit integer vector of [2 x i32]. 1192 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1193 /// results. 1194 static __inline__ __m64 __DEFAULT_FN_ATTRS 1195 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1196 { 1197 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1198 } 1199 1200 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of 1201 /// [8 x i8] to determine if the element of the first vector is greater than 1202 /// the corresponding element of the second vector. The comparison yields 0 1203 /// for false, 0xFF for true. 1204 /// 1205 /// \headerfile <x86intrin.h> 1206 /// 1207 /// This intrinsic corresponds to the \c PCMPGTB instruction. 1208 /// 1209 /// \param __m1 1210 /// A 64-bit integer vector of [8 x i8]. 1211 /// \param __m2 1212 /// A 64-bit integer vector of [8 x i8]. 1213 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1214 /// results. 1215 static __inline__ __m64 __DEFAULT_FN_ATTRS 1216 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1217 { 1218 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1219 } 1220 1221 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of 1222 /// [4 x i16] to determine if the element of the first vector is greater than 1223 /// the corresponding element of the second vector. The comparison yields 0 1224 /// for false, 0xFFFF for true. 1225 /// 1226 /// \headerfile <x86intrin.h> 1227 /// 1228 /// This intrinsic corresponds to the \c PCMPGTW instruction. 1229 /// 1230 /// \param __m1 1231 /// A 64-bit integer vector of [4 x i16]. 1232 /// \param __m2 1233 /// A 64-bit integer vector of [4 x i16]. 1234 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1235 /// results. 1236 static __inline__ __m64 __DEFAULT_FN_ATTRS 1237 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1238 { 1239 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1240 } 1241 1242 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of 1243 /// [2 x i32] to determine if the element of the first vector is greater than 1244 /// the corresponding element of the second vector. The comparison yields 0 1245 /// for false, 0xFFFFFFFF for true. 1246 /// 1247 /// \headerfile <x86intrin.h> 1248 /// 1249 /// This intrinsic corresponds to the \c PCMPGTD instruction. 1250 /// 1251 /// \param __m1 1252 /// A 64-bit integer vector of [2 x i32]. 1253 /// \param __m2 1254 /// A 64-bit integer vector of [2 x i32]. 1255 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1256 /// results. 1257 static __inline__ __m64 __DEFAULT_FN_ATTRS 1258 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1259 { 1260 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1261 } 1262 1263 /// \brief Constructs a 64-bit integer vector initialized to zero. 1264 /// 1265 /// \headerfile <x86intrin.h> 1266 /// 1267 /// This intrinsic corresponds to the the \c VXORPS / XORPS instruction. 1268 /// 1269 /// \returns An initialized 64-bit integer vector with all elements set to zero. 1270 static __inline__ __m64 __DEFAULT_FN_ATTRS 1271 _mm_setzero_si64(void) 1272 { 1273 return (__m64){ 0LL }; 1274 } 1275 1276 /// \brief Constructs a 64-bit integer vector initialized with the specified 1277 /// 32-bit integer values. 1278 /// 1279 /// \headerfile <x86intrin.h> 1280 /// 1281 /// This intrinsic is a utility function and does not correspond to a specific 1282 /// instruction. 1283 /// 1284 /// \param __i1 1285 /// A 32-bit integer value used to initialize the upper 32 bits of the 1286 /// result. 1287 /// \param __i0 1288 /// A 32-bit integer value used to initialize the lower 32 bits of the 1289 /// result. 1290 /// \returns An initialized 64-bit integer vector. 1291 static __inline__ __m64 __DEFAULT_FN_ATTRS 1292 _mm_set_pi32(int __i1, int __i0) 1293 { 1294 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1295 } 1296 1297 /// \brief Constructs a 64-bit integer vector initialized with the specified 1298 /// 16-bit integer values. 1299 /// 1300 /// \headerfile <x86intrin.h> 1301 /// 1302 /// This intrinsic is a utility function and does not correspond to a specific 1303 /// instruction. 1304 /// 1305 /// \param __s3 1306 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1307 /// \param __s2 1308 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1309 /// \param __s1 1310 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1311 /// \param __s0 1312 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1313 /// \returns An initialized 64-bit integer vector. 1314 static __inline__ __m64 __DEFAULT_FN_ATTRS 1315 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1316 { 1317 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1318 } 1319 1320 /// \brief Constructs a 64-bit integer vector initialized with the specified 1321 /// 8-bit integer values. 1322 /// 1323 /// \headerfile <x86intrin.h> 1324 /// 1325 /// This intrinsic is a utility function and does not correspond to a specific 1326 /// instruction. 1327 /// 1328 /// \param __b7 1329 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1330 /// \param __b6 1331 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1332 /// \param __b5 1333 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1334 /// \param __b4 1335 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1336 /// \param __b3 1337 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1338 /// \param __b2 1339 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1340 /// \param __b1 1341 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1342 /// \param __b0 1343 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1344 /// \returns An initialized 64-bit integer vector. 1345 static __inline__ __m64 __DEFAULT_FN_ATTRS 1346 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1347 char __b1, char __b0) 1348 { 1349 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1350 __b4, __b5, __b6, __b7); 1351 } 1352 1353 /// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the 1354 /// 32-bit integer vector elements set to the specified 32-bit integer 1355 /// value. 1356 /// 1357 /// \headerfile <x86intrin.h> 1358 /// 1359 /// This intrinsic corresponds to the \c VPSHUFD / PSHUFD instruction. 1360 /// 1361 /// \param __i 1362 /// A 32-bit integer value used to initialize each vector element of the 1363 /// result. 1364 /// \returns An initialized 64-bit integer vector of [2 x i32]. 1365 static __inline__ __m64 __DEFAULT_FN_ATTRS 1366 _mm_set1_pi32(int __i) 1367 { 1368 return _mm_set_pi32(__i, __i); 1369 } 1370 1371 /// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the 1372 /// 16-bit integer vector elements set to the specified 16-bit integer 1373 /// value. 1374 /// 1375 /// \headerfile <x86intrin.h> 1376 /// 1377 /// This intrinsic corresponds to the \c VPSHUFLW / PSHUFLW instruction. 1378 /// 1379 /// \param __w 1380 /// A 16-bit integer value used to initialize each vector element of the 1381 /// result. 1382 /// \returns An initialized 64-bit integer vector of [4 x i16]. 1383 static __inline__ __m64 __DEFAULT_FN_ATTRS 1384 _mm_set1_pi16(short __w) 1385 { 1386 return _mm_set_pi16(__w, __w, __w, __w); 1387 } 1388 1389 /// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the 1390 /// 8-bit integer vector elements set to the specified 8-bit integer value. 1391 /// 1392 /// \headerfile <x86intrin.h> 1393 /// 1394 /// This intrinsic corresponds to the \c VPUNPCKLBW + VPSHUFLW / \c PUNPCKLBW + 1395 /// PSHUFLW instruction. 1396 /// 1397 /// \param __b 1398 /// An 8-bit integer value used to initialize each vector element of the 1399 /// result. 1400 /// \returns An initialized 64-bit integer vector of [8 x i8]. 1401 static __inline__ __m64 __DEFAULT_FN_ATTRS 1402 _mm_set1_pi8(char __b) 1403 { 1404 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1405 } 1406 1407 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1408 /// the specified 32-bit integer values. 1409 /// 1410 /// \headerfile <x86intrin.h> 1411 /// 1412 /// This intrinsic is a utility function and does not correspond to a specific 1413 /// instruction. 1414 /// 1415 /// \param __i0 1416 /// A 32-bit integer value used to initialize the lower 32 bits of the 1417 /// result. 1418 /// \param __i1 1419 /// A 32-bit integer value used to initialize the upper 32 bits of the 1420 /// result. 1421 /// \returns An initialized 64-bit integer vector. 1422 static __inline__ __m64 __DEFAULT_FN_ATTRS 1423 _mm_setr_pi32(int __i0, int __i1) 1424 { 1425 return _mm_set_pi32(__i1, __i0); 1426 } 1427 1428 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1429 /// the specified 16-bit integer values. 1430 /// 1431 /// \headerfile <x86intrin.h> 1432 /// 1433 /// This intrinsic is a utility function and does not correspond to a specific 1434 /// instruction. 1435 /// 1436 /// \param __w0 1437 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1438 /// \param __w1 1439 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1440 /// \param __w2 1441 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1442 /// \param __w3 1443 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1444 /// \returns An initialized 64-bit integer vector. 1445 static __inline__ __m64 __DEFAULT_FN_ATTRS 1446 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1447 { 1448 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1449 } 1450 1451 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1452 /// the specified 8-bit integer values. 1453 /// 1454 /// \headerfile <x86intrin.h> 1455 /// 1456 /// This intrinsic is a utility function and does not correspond to a specific 1457 /// instruction. 1458 /// 1459 /// \param __b0 1460 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1461 /// \param __b1 1462 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1463 /// \param __b2 1464 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1465 /// \param __b3 1466 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1467 /// \param __b4 1468 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1469 /// \param __b5 1470 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1471 /// \param __b6 1472 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1473 /// \param __b7 1474 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1475 /// \returns An initialized 64-bit integer vector. 1476 static __inline__ __m64 __DEFAULT_FN_ATTRS 1477 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1478 char __b6, char __b7) 1479 { 1480 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1481 } 1482 1483 #undef __DEFAULT_FN_ATTRS 1484 1485 /* Aliases for compatibility. */ 1486 #define _m_empty _mm_empty 1487 #define _m_from_int _mm_cvtsi32_si64 1488 #define _m_from_int64 _mm_cvtsi64_m64 1489 #define _m_to_int _mm_cvtsi64_si32 1490 #define _m_to_int64 _mm_cvtm64_si64 1491 #define _m_packsswb _mm_packs_pi16 1492 #define _m_packssdw _mm_packs_pi32 1493 #define _m_packuswb _mm_packs_pu16 1494 #define _m_punpckhbw _mm_unpackhi_pi8 1495 #define _m_punpckhwd _mm_unpackhi_pi16 1496 #define _m_punpckhdq _mm_unpackhi_pi32 1497 #define _m_punpcklbw _mm_unpacklo_pi8 1498 #define _m_punpcklwd _mm_unpacklo_pi16 1499 #define _m_punpckldq _mm_unpacklo_pi32 1500 #define _m_paddb _mm_add_pi8 1501 #define _m_paddw _mm_add_pi16 1502 #define _m_paddd _mm_add_pi32 1503 #define _m_paddsb _mm_adds_pi8 1504 #define _m_paddsw _mm_adds_pi16 1505 #define _m_paddusb _mm_adds_pu8 1506 #define _m_paddusw _mm_adds_pu16 1507 #define _m_psubb _mm_sub_pi8 1508 #define _m_psubw _mm_sub_pi16 1509 #define _m_psubd _mm_sub_pi32 1510 #define _m_psubsb _mm_subs_pi8 1511 #define _m_psubsw _mm_subs_pi16 1512 #define _m_psubusb _mm_subs_pu8 1513 #define _m_psubusw _mm_subs_pu16 1514 #define _m_pmaddwd _mm_madd_pi16 1515 #define _m_pmulhw _mm_mulhi_pi16 1516 #define _m_pmullw _mm_mullo_pi16 1517 #define _m_psllw _mm_sll_pi16 1518 #define _m_psllwi _mm_slli_pi16 1519 #define _m_pslld _mm_sll_pi32 1520 #define _m_pslldi _mm_slli_pi32 1521 #define _m_psllq _mm_sll_si64 1522 #define _m_psllqi _mm_slli_si64 1523 #define _m_psraw _mm_sra_pi16 1524 #define _m_psrawi _mm_srai_pi16 1525 #define _m_psrad _mm_sra_pi32 1526 #define _m_psradi _mm_srai_pi32 1527 #define _m_psrlw _mm_srl_pi16 1528 #define _m_psrlwi _mm_srli_pi16 1529 #define _m_psrld _mm_srl_pi32 1530 #define _m_psrldi _mm_srli_pi32 1531 #define _m_psrlq _mm_srl_si64 1532 #define _m_psrlqi _mm_srli_si64 1533 #define _m_pand _mm_and_si64 1534 #define _m_pandn _mm_andnot_si64 1535 #define _m_por _mm_or_si64 1536 #define _m_pxor _mm_xor_si64 1537 #define _m_pcmpeqb _mm_cmpeq_pi8 1538 #define _m_pcmpeqw _mm_cmpeq_pi16 1539 #define _m_pcmpeqd _mm_cmpeq_pi32 1540 #define _m_pcmpgtb _mm_cmpgt_pi8 1541 #define _m_pcmpgtw _mm_cmpgt_pi16 1542 #define _m_pcmpgtd _mm_cmpgt_pi32 1543 1544 #endif /* __MMINTRIN_H */ 1545 1546