1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __MMINTRIN_H 25 #define __MMINTRIN_H 26 27 typedef long long __m64 __attribute__((__vector_size__(8))); 28 29 typedef long long __v1di __attribute__((__vector_size__(8))); 30 typedef int __v2si __attribute__((__vector_size__(8))); 31 typedef short __v4hi __attribute__((__vector_size__(8))); 32 typedef char __v8qi __attribute__((__vector_size__(8))); 33 34 /* Define the default attributes for the functions in this file. */ 35 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 36 37 /// \brief Clears the MMX state by setting the state of the x87 stack registers 38 /// to empty. 39 /// 40 /// \headerfile <x86intrin.h> 41 /// 42 /// This intrinsic corresponds to the <c> EMMS </c> instruction. 43 /// 44 static __inline__ void __DEFAULT_FN_ATTRS 45 _mm_empty(void) 46 { 47 __builtin_ia32_emms(); 48 } 49 50 /// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the 51 /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 52 /// 53 /// \headerfile <x86intrin.h> 54 /// 55 /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 56 /// 57 /// \param __i 58 /// A 32-bit integer value. 59 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 60 /// parameter. The upper 32 bits are set to 0. 61 static __inline__ __m64 __DEFAULT_FN_ATTRS 62 _mm_cvtsi32_si64(int __i) 63 { 64 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 65 } 66 67 /// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 68 /// signed integer. 69 /// 70 /// \headerfile <x86intrin.h> 71 /// 72 /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 73 /// 74 /// \param __m 75 /// A 64-bit integer vector. 76 /// \returns A 32-bit signed integer value containing the lower 32 bits of the 77 /// parameter. 78 static __inline__ int __DEFAULT_FN_ATTRS 79 _mm_cvtsi64_si32(__m64 __m) 80 { 81 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 82 } 83 84 /// \brief Casts a 64-bit signed integer value into a 64-bit integer vector. 85 /// 86 /// \headerfile <x86intrin.h> 87 /// 88 /// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction. 89 /// 90 /// \param __i 91 /// A 64-bit signed integer. 92 /// \returns A 64-bit integer vector containing the same bitwise pattern as the 93 /// parameter. 94 static __inline__ __m64 __DEFAULT_FN_ATTRS 95 _mm_cvtsi64_m64(long long __i) 96 { 97 return (__m64)__i; 98 } 99 100 /// \brief Casts a 64-bit integer vector into a 64-bit signed integer value. 101 /// 102 /// \headerfile <x86intrin.h> 103 /// 104 /// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction. 105 /// 106 /// \param __m 107 /// A 64-bit integer vector. 108 /// \returns A 64-bit signed integer containing the same bitwise pattern as the 109 /// parameter. 110 static __inline__ long long __DEFAULT_FN_ATTRS 111 _mm_cvtm64_si64(__m64 __m) 112 { 113 return (long long)__m; 114 } 115 116 /// \brief Converts 16-bit signed integers from both 64-bit integer vector 117 /// parameters of [4 x i16] into 8-bit signed integer values, and constructs 118 /// a 64-bit integer vector of [8 x i8] as the result. Positive values 119 /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 120 /// are saturated to 0x80. 121 /// 122 /// \headerfile <x86intrin.h> 123 /// 124 /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 125 /// 126 /// \param __m1 127 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 128 /// 16-bit signed integer and is converted to an 8-bit signed integer with 129 /// saturation. Positive values greater than 0x7F are saturated to 0x7F. 130 /// Negative values less than 0x80 are saturated to 0x80. The converted 131 /// [4 x i8] values are written to the lower 32 bits of the result. 132 /// \param __m2 133 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 134 /// 16-bit signed integer and is converted to an 8-bit signed integer with 135 /// saturation. Positive values greater than 0x7F are saturated to 0x7F. 136 /// Negative values less than 0x80 are saturated to 0x80. The converted 137 /// [4 x i8] values are written to the upper 32 bits of the result. 138 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 139 /// values. 140 static __inline__ __m64 __DEFAULT_FN_ATTRS 141 _mm_packs_pi16(__m64 __m1, __m64 __m2) 142 { 143 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 144 } 145 146 /// \brief Converts 32-bit signed integers from both 64-bit integer vector 147 /// parameters of [2 x i32] into 16-bit signed integer values, and constructs 148 /// a 64-bit integer vector of [4 x i16] as the result. Positive values 149 /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than 150 /// 0x8000 are saturated to 0x8000. 151 /// 152 /// \headerfile <x86intrin.h> 153 /// 154 /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 155 /// 156 /// \param __m1 157 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 158 /// 32-bit signed integer and is converted to a 16-bit signed integer with 159 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 160 /// Negative values less than 0x8000 are saturated to 0x8000. The converted 161 /// [2 x i16] values are written to the lower 32 bits of the result. 162 /// \param __m2 163 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 164 /// 32-bit signed integer and is converted to a 16-bit signed integer with 165 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 166 /// Negative values less than 0x8000 are saturated to 0x8000. The converted 167 /// [2 x i16] values are written to the upper 32 bits of the result. 168 /// \returns A 64-bit integer vector of [4 x i16] containing the converted 169 /// values. 170 static __inline__ __m64 __DEFAULT_FN_ATTRS 171 _mm_packs_pi32(__m64 __m1, __m64 __m2) 172 { 173 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 174 } 175 176 /// \brief Converts 16-bit signed integers from both 64-bit integer vector 177 /// parameters of [4 x i16] into 8-bit unsigned integer values, and 178 /// constructs a 64-bit integer vector of [8 x i8] as the result. Values 179 /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated 180 /// to 0. 181 /// 182 /// \headerfile <x86intrin.h> 183 /// 184 /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 185 /// 186 /// \param __m1 187 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 188 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with 189 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 190 /// than 0 are saturated to 0. The converted [4 x i8] values are written to 191 /// the lower 32 bits of the result. 192 /// \param __m2 193 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 194 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with 195 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 196 /// than 0 are saturated to 0. The converted [4 x i8] values are written to 197 /// the upper 32 bits of the result. 198 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 199 /// values. 200 static __inline__ __m64 __DEFAULT_FN_ATTRS 201 _mm_packs_pu16(__m64 __m1, __m64 __m2) 202 { 203 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 204 } 205 206 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 207 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 208 /// 209 /// \headerfile <x86intrin.h> 210 /// 211 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 212 /// 213 /// \param __m1 214 /// A 64-bit integer vector of [8 x i8]. \n 215 /// Bits [39:32] are written to bits [7:0] of the result. \n 216 /// Bits [47:40] are written to bits [23:16] of the result. \n 217 /// Bits [55:48] are written to bits [39:32] of the result. \n 218 /// Bits [63:56] are written to bits [55:48] of the result. 219 /// \param __m2 220 /// A 64-bit integer vector of [8 x i8]. 221 /// Bits [39:32] are written to bits [15:8] of the result. \n 222 /// Bits [47:40] are written to bits [31:24] of the result. \n 223 /// Bits [55:48] are written to bits [47:40] of the result. \n 224 /// Bits [63:56] are written to bits [63:56] of the result. 225 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 226 /// values. 227 static __inline__ __m64 __DEFAULT_FN_ATTRS 228 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 229 { 230 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 231 } 232 233 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of 234 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 235 /// 236 /// \headerfile <x86intrin.h> 237 /// 238 /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 239 /// 240 /// \param __m1 241 /// A 64-bit integer vector of [4 x i16]. 242 /// Bits [47:32] are written to bits [15:0] of the result. \n 243 /// Bits [63:48] are written to bits [47:32] of the result. 244 /// \param __m2 245 /// A 64-bit integer vector of [4 x i16]. 246 /// Bits [47:32] are written to bits [31:16] of the result. \n 247 /// Bits [63:48] are written to bits [63:48] of the result. 248 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 249 /// values. 250 static __inline__ __m64 __DEFAULT_FN_ATTRS 251 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 252 { 253 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 254 } 255 256 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of 257 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 258 /// 259 /// \headerfile <x86intrin.h> 260 /// 261 /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 262 /// 263 /// \param __m1 264 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 265 /// the lower 32 bits of the result. 266 /// \param __m2 267 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 268 /// the upper 32 bits of the result. 269 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 270 /// values. 271 static __inline__ __m64 __DEFAULT_FN_ATTRS 272 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 273 { 274 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 275 } 276 277 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 278 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 279 /// 280 /// \headerfile <x86intrin.h> 281 /// 282 /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 283 /// 284 /// \param __m1 285 /// A 64-bit integer vector of [8 x i8]. 286 /// Bits [7:0] are written to bits [7:0] of the result. \n 287 /// Bits [15:8] are written to bits [23:16] of the result. \n 288 /// Bits [23:16] are written to bits [39:32] of the result. \n 289 /// Bits [31:24] are written to bits [55:48] of the result. 290 /// \param __m2 291 /// A 64-bit integer vector of [8 x i8]. 292 /// Bits [7:0] are written to bits [15:8] of the result. \n 293 /// Bits [15:8] are written to bits [31:24] of the result. \n 294 /// Bits [23:16] are written to bits [47:40] of the result. \n 295 /// Bits [31:24] are written to bits [63:56] of the result. 296 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 297 /// values. 298 static __inline__ __m64 __DEFAULT_FN_ATTRS 299 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 300 { 301 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 302 } 303 304 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of 305 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 306 /// 307 /// \headerfile <x86intrin.h> 308 /// 309 /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 310 /// 311 /// \param __m1 312 /// A 64-bit integer vector of [4 x i16]. 313 /// Bits [15:0] are written to bits [15:0] of the result. \n 314 /// Bits [31:16] are written to bits [47:32] of the result. 315 /// \param __m2 316 /// A 64-bit integer vector of [4 x i16]. 317 /// Bits [15:0] are written to bits [31:16] of the result. \n 318 /// Bits [31:16] are written to bits [63:48] of the result. 319 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 320 /// values. 321 static __inline__ __m64 __DEFAULT_FN_ATTRS 322 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 323 { 324 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 325 } 326 327 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of 328 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 329 /// 330 /// \headerfile <x86intrin.h> 331 /// 332 /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 333 /// 334 /// \param __m1 335 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 336 /// the lower 32 bits of the result. 337 /// \param __m2 338 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 339 /// the upper 32 bits of the result. 340 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 341 /// values. 342 static __inline__ __m64 __DEFAULT_FN_ATTRS 343 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 344 { 345 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 346 } 347 348 /// \brief Adds each 8-bit integer element of the first 64-bit integer vector 349 /// of [8 x i8] to the corresponding 8-bit integer element of the second 350 /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 351 /// packed into a 64-bit integer vector of [8 x i8]. 352 /// 353 /// \headerfile <x86intrin.h> 354 /// 355 /// This intrinsic corresponds to the <c> PADDB </c> instruction. 356 /// 357 /// \param __m1 358 /// A 64-bit integer vector of [8 x i8]. 359 /// \param __m2 360 /// A 64-bit integer vector of [8 x i8]. 361 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 362 /// parameters. 363 static __inline__ __m64 __DEFAULT_FN_ATTRS 364 _mm_add_pi8(__m64 __m1, __m64 __m2) 365 { 366 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 367 } 368 369 /// \brief Adds each 16-bit integer element of the first 64-bit integer vector 370 /// of [4 x i16] to the corresponding 16-bit integer element of the second 371 /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 372 /// packed into a 64-bit integer vector of [4 x i16]. 373 /// 374 /// \headerfile <x86intrin.h> 375 /// 376 /// This intrinsic corresponds to the <c> PADDW </c> instruction. 377 /// 378 /// \param __m1 379 /// A 64-bit integer vector of [4 x i16]. 380 /// \param __m2 381 /// A 64-bit integer vector of [4 x i16]. 382 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 383 /// parameters. 384 static __inline__ __m64 __DEFAULT_FN_ATTRS 385 _mm_add_pi16(__m64 __m1, __m64 __m2) 386 { 387 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 388 } 389 390 /// \brief Adds each 32-bit integer element of the first 64-bit integer vector 391 /// of [2 x i32] to the corresponding 32-bit integer element of the second 392 /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 393 /// packed into a 64-bit integer vector of [2 x i32]. 394 /// 395 /// \headerfile <x86intrin.h> 396 /// 397 /// This intrinsic corresponds to the <c> PADDD </c> instruction. 398 /// 399 /// \param __m1 400 /// A 64-bit integer vector of [2 x i32]. 401 /// \param __m2 402 /// A 64-bit integer vector of [2 x i32]. 403 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 404 /// parameters. 405 static __inline__ __m64 __DEFAULT_FN_ATTRS 406 _mm_add_pi32(__m64 __m1, __m64 __m2) 407 { 408 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 409 } 410 411 /// \brief Adds each 8-bit signed integer element of the first 64-bit integer 412 /// vector of [8 x i8] to the corresponding 8-bit signed integer element of 413 /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than 414 /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to 415 /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. 416 /// 417 /// \headerfile <x86intrin.h> 418 /// 419 /// This intrinsic corresponds to the <c> PADDSB </c> instruction. 420 /// 421 /// \param __m1 422 /// A 64-bit integer vector of [8 x i8]. 423 /// \param __m2 424 /// A 64-bit integer vector of [8 x i8]. 425 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 426 /// of both parameters. 427 static __inline__ __m64 __DEFAULT_FN_ATTRS 428 _mm_adds_pi8(__m64 __m1, __m64 __m2) 429 { 430 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 431 } 432 433 /// \brief Adds each 16-bit signed integer element of the first 64-bit integer 434 /// vector of [4 x i16] to the corresponding 16-bit signed integer element of 435 /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than 436 /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are 437 /// saturated to 0x8000. The results are packed into a 64-bit integer vector 438 /// of [4 x i16]. 439 /// 440 /// \headerfile <x86intrin.h> 441 /// 442 /// This intrinsic corresponds to the <c> PADDSW </c> instruction. 443 /// 444 /// \param __m1 445 /// A 64-bit integer vector of [4 x i16]. 446 /// \param __m2 447 /// A 64-bit integer vector of [4 x i16]. 448 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 449 /// of both parameters. 450 static __inline__ __m64 __DEFAULT_FN_ATTRS 451 _mm_adds_pi16(__m64 __m1, __m64 __m2) 452 { 453 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 454 } 455 456 /// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer 457 /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of 458 /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are 459 /// saturated to 0xFF. The results are packed into a 64-bit integer vector of 460 /// [8 x i8]. 461 /// 462 /// \headerfile <x86intrin.h> 463 /// 464 /// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 465 /// 466 /// \param __m1 467 /// A 64-bit integer vector of [8 x i8]. 468 /// \param __m2 469 /// A 64-bit integer vector of [8 x i8]. 470 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 471 /// unsigned sums of both parameters. 472 static __inline__ __m64 __DEFAULT_FN_ATTRS 473 _mm_adds_pu8(__m64 __m1, __m64 __m2) 474 { 475 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 476 } 477 478 /// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer 479 /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element 480 /// of the second 64-bit integer vector of [4 x i16]. Sums greater than 481 /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit 482 /// integer vector of [4 x i16]. 483 /// 484 /// \headerfile <x86intrin.h> 485 /// 486 /// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 487 /// 488 /// \param __m1 489 /// A 64-bit integer vector of [4 x i16]. 490 /// \param __m2 491 /// A 64-bit integer vector of [4 x i16]. 492 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 493 /// unsigned sums of both parameters. 494 static __inline__ __m64 __DEFAULT_FN_ATTRS 495 _mm_adds_pu16(__m64 __m1, __m64 __m2) 496 { 497 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 498 } 499 500 /// \brief Subtracts each 8-bit integer element of the second 64-bit integer 501 /// vector of [8 x i8] from the corresponding 8-bit integer element of the 502 /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 503 /// are packed into a 64-bit integer vector of [8 x i8]. 504 /// 505 /// \headerfile <x86intrin.h> 506 /// 507 /// This intrinsic corresponds to the <c> PSUBB </c> instruction. 508 /// 509 /// \param __m1 510 /// A 64-bit integer vector of [8 x i8] containing the minuends. 511 /// \param __m2 512 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 513 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of 514 /// both parameters. 515 static __inline__ __m64 __DEFAULT_FN_ATTRS 516 _mm_sub_pi8(__m64 __m1, __m64 __m2) 517 { 518 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 519 } 520 521 /// \brief Subtracts each 16-bit integer element of the second 64-bit integer 522 /// vector of [4 x i16] from the corresponding 16-bit integer element of the 523 /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 524 /// results are packed into a 64-bit integer vector of [4 x i16]. 525 /// 526 /// \headerfile <x86intrin.h> 527 /// 528 /// This intrinsic corresponds to the <c> PSUBW </c> instruction. 529 /// 530 /// \param __m1 531 /// A 64-bit integer vector of [4 x i16] containing the minuends. 532 /// \param __m2 533 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 534 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of 535 /// both parameters. 536 static __inline__ __m64 __DEFAULT_FN_ATTRS 537 _mm_sub_pi16(__m64 __m1, __m64 __m2) 538 { 539 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 540 } 541 542 /// \brief Subtracts each 32-bit integer element of the second 64-bit integer 543 /// vector of [2 x i32] from the corresponding 32-bit integer element of the 544 /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 545 /// results are packed into a 64-bit integer vector of [2 x i32]. 546 /// 547 /// \headerfile <x86intrin.h> 548 /// 549 /// This intrinsic corresponds to the <c> PSUBD </c> instruction. 550 /// 551 /// \param __m1 552 /// A 64-bit integer vector of [2 x i32] containing the minuends. 553 /// \param __m2 554 /// A 64-bit integer vector of [2 x i32] containing the subtrahends. 555 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of 556 /// both parameters. 557 static __inline__ __m64 __DEFAULT_FN_ATTRS 558 _mm_sub_pi32(__m64 __m1, __m64 __m2) 559 { 560 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 561 } 562 563 /// \brief Subtracts each 8-bit signed integer element of the second 64-bit 564 /// integer vector of [8 x i8] from the corresponding 8-bit signed integer 565 /// element of the first 64-bit integer vector of [8 x i8]. Positive results 566 /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 567 /// are saturated to 0x80. The results are packed into a 64-bit integer 568 /// vector of [8 x i8]. 569 /// 570 /// \headerfile <x86intrin.h> 571 /// 572 /// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 573 /// 574 /// \param __m1 575 /// A 64-bit integer vector of [8 x i8] containing the minuends. 576 /// \param __m2 577 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 578 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 579 /// differences of both parameters. 580 static __inline__ __m64 __DEFAULT_FN_ATTRS 581 _mm_subs_pi8(__m64 __m1, __m64 __m2) 582 { 583 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 584 } 585 586 /// \brief Subtracts each 16-bit signed integer element of the second 64-bit 587 /// integer vector of [4 x i16] from the corresponding 16-bit signed integer 588 /// element of the first 64-bit integer vector of [4 x i16]. Positive results 589 /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than 590 /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit 591 /// integer vector of [4 x i16]. 592 /// 593 /// \headerfile <x86intrin.h> 594 /// 595 /// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 596 /// 597 /// \param __m1 598 /// A 64-bit integer vector of [4 x i16] containing the minuends. 599 /// \param __m2 600 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 601 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 602 /// differences of both parameters. 603 static __inline__ __m64 __DEFAULT_FN_ATTRS 604 _mm_subs_pi16(__m64 __m1, __m64 __m2) 605 { 606 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 607 } 608 609 /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit 610 /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 611 /// element of the first 64-bit integer vector of [8 x i8]. 612 /// 613 /// If an element of the first vector is less than the corresponding element 614 /// of the second vector, the result is saturated to 0. The results are 615 /// packed into a 64-bit integer vector of [8 x i8]. 616 /// 617 /// \headerfile <x86intrin.h> 618 /// 619 /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 620 /// 621 /// \param __m1 622 /// A 64-bit integer vector of [8 x i8] containing the minuends. 623 /// \param __m2 624 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 625 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 626 /// differences of both parameters. 627 static __inline__ __m64 __DEFAULT_FN_ATTRS 628 _mm_subs_pu8(__m64 __m1, __m64 __m2) 629 { 630 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 631 } 632 633 /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit 634 /// integer vector of [4 x i16] from the corresponding 16-bit unsigned 635 /// integer element of the first 64-bit integer vector of [4 x i16]. 636 /// 637 /// If an element of the first vector is less than the corresponding element 638 /// of the second vector, the result is saturated to 0. The results are 639 /// packed into a 64-bit integer vector of [4 x i16]. 640 /// 641 /// \headerfile <x86intrin.h> 642 /// 643 /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 644 /// 645 /// \param __m1 646 /// A 64-bit integer vector of [4 x i16] containing the minuends. 647 /// \param __m2 648 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 649 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 650 /// differences of both parameters. 651 static __inline__ __m64 __DEFAULT_FN_ATTRS 652 _mm_subs_pu16(__m64 __m1, __m64 __m2) 653 { 654 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 655 } 656 657 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit 658 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 659 /// element of the second 64-bit integer vector of [4 x i16] and get four 660 /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 661 /// The lower 32 bits of these two sums are packed into a 64-bit integer 662 /// vector of [2 x i32]. 663 /// 664 /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 665 /// of both parameters are multiplied, and the sum of both results is written 666 /// to bits [31:0] of the result. 667 /// 668 /// \headerfile <x86intrin.h> 669 /// 670 /// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 671 /// 672 /// \param __m1 673 /// A 64-bit integer vector of [4 x i16]. 674 /// \param __m2 675 /// A 64-bit integer vector of [4 x i16]. 676 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of 677 /// products of both parameters. 678 static __inline__ __m64 __DEFAULT_FN_ATTRS 679 _mm_madd_pi16(__m64 __m1, __m64 __m2) 680 { 681 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 682 } 683 684 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit 685 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 686 /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 687 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 688 /// 689 /// \headerfile <x86intrin.h> 690 /// 691 /// This intrinsic corresponds to the <c> PMULHW </c> instruction. 692 /// 693 /// \param __m1 694 /// A 64-bit integer vector of [4 x i16]. 695 /// \param __m2 696 /// A 64-bit integer vector of [4 x i16]. 697 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 698 /// of the products of both parameters. 699 static __inline__ __m64 __DEFAULT_FN_ATTRS 700 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 701 { 702 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 703 } 704 705 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit 706 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 707 /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 708 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 709 /// 710 /// \headerfile <x86intrin.h> 711 /// 712 /// This intrinsic corresponds to the <c> PMULLW </c> instruction. 713 /// 714 /// \param __m1 715 /// A 64-bit integer vector of [4 x i16]. 716 /// \param __m2 717 /// A 64-bit integer vector of [4 x i16]. 718 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 719 /// of the products of both parameters. 720 static __inline__ __m64 __DEFAULT_FN_ATTRS 721 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 722 { 723 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 724 } 725 726 /// \brief Left-shifts each 16-bit signed integer element of the first 727 /// parameter, which is a 64-bit integer vector of [4 x i16], by the number 728 /// of bits specified by the second parameter, which is a 64-bit integer. The 729 /// lower 16 bits of the results are packed into a 64-bit integer vector of 730 /// [4 x i16]. 731 /// 732 /// \headerfile <x86intrin.h> 733 /// 734 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 735 /// 736 /// \param __m 737 /// A 64-bit integer vector of [4 x i16]. 738 /// \param __count 739 /// A 64-bit integer vector interpreted as a single 64-bit integer. 740 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 741 /// values. If \a __count is greater or equal to 16, the result is set to all 742 /// 0. 743 static __inline__ __m64 __DEFAULT_FN_ATTRS 744 _mm_sll_pi16(__m64 __m, __m64 __count) 745 { 746 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 747 } 748 749 /// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer 750 /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 751 /// The lower 16 bits of the results are packed into a 64-bit integer vector 752 /// of [4 x i16]. 753 /// 754 /// \headerfile <x86intrin.h> 755 /// 756 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 757 /// 758 /// \param __m 759 /// A 64-bit integer vector of [4 x i16]. 760 /// \param __count 761 /// A 32-bit integer value. 762 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 763 /// values. If \a __count is greater or equal to 16, the result is set to all 764 /// 0. 765 static __inline__ __m64 __DEFAULT_FN_ATTRS 766 _mm_slli_pi16(__m64 __m, int __count) 767 { 768 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 769 } 770 771 /// \brief Left-shifts each 32-bit signed integer element of the first 772 /// parameter, which is a 64-bit integer vector of [2 x i32], by the number 773 /// of bits specified by the second parameter, which is a 64-bit integer. The 774 /// lower 32 bits of the results are packed into a 64-bit integer vector of 775 /// [2 x i32]. 776 /// 777 /// \headerfile <x86intrin.h> 778 /// 779 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 780 /// 781 /// \param __m 782 /// A 64-bit integer vector of [2 x i32]. 783 /// \param __count 784 /// A 64-bit integer vector interpreted as a single 64-bit integer. 785 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 786 /// values. If \a __count is greater or equal to 32, the result is set to all 787 /// 0. 788 static __inline__ __m64 __DEFAULT_FN_ATTRS 789 _mm_sll_pi32(__m64 __m, __m64 __count) 790 { 791 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 792 } 793 794 /// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer 795 /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 796 /// The lower 32 bits of the results are packed into a 64-bit integer vector 797 /// of [2 x i32]. 798 /// 799 /// \headerfile <x86intrin.h> 800 /// 801 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 802 /// 803 /// \param __m 804 /// A 64-bit integer vector of [2 x i32]. 805 /// \param __count 806 /// A 32-bit integer value. 807 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 808 /// values. If \a __count is greater or equal to 32, the result is set to all 809 /// 0. 810 static __inline__ __m64 __DEFAULT_FN_ATTRS 811 _mm_slli_pi32(__m64 __m, int __count) 812 { 813 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 814 } 815 816 /// \brief Left-shifts the first 64-bit integer parameter by the number of bits 817 /// specified by the second 64-bit integer parameter. The lower 64 bits of 818 /// result are returned. 819 /// 820 /// \headerfile <x86intrin.h> 821 /// 822 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 823 /// 824 /// \param __m 825 /// A 64-bit integer vector interpreted as a single 64-bit integer. 826 /// \param __count 827 /// A 64-bit integer vector interpreted as a single 64-bit integer. 828 /// \returns A 64-bit integer vector containing the left-shifted value. If 829 /// \a __count is greater or equal to 64, the result is set to 0. 830 static __inline__ __m64 __DEFAULT_FN_ATTRS 831 _mm_sll_si64(__m64 __m, __m64 __count) 832 { 833 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 834 } 835 836 /// \brief Left-shifts the first parameter, which is a 64-bit integer, by the 837 /// number of bits specified by the second parameter, which is a 32-bit 838 /// integer. The lower 64 bits of result are returned. 839 /// 840 /// \headerfile <x86intrin.h> 841 /// 842 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 843 /// 844 /// \param __m 845 /// A 64-bit integer vector interpreted as a single 64-bit integer. 846 /// \param __count 847 /// A 32-bit integer value. 848 /// \returns A 64-bit integer vector containing the left-shifted value. If 849 /// \a __count is greater or equal to 64, the result is set to 0. 850 static __inline__ __m64 __DEFAULT_FN_ATTRS 851 _mm_slli_si64(__m64 __m, int __count) 852 { 853 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 854 } 855 856 /// \brief Right-shifts each 16-bit integer element of the first parameter, 857 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 858 /// specified by the second parameter, which is a 64-bit integer. 859 /// 860 /// High-order bits are filled with the sign bit of the initial value of each 861 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 862 /// vector of [4 x i16]. 863 /// 864 /// \headerfile <x86intrin.h> 865 /// 866 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 867 /// 868 /// \param __m 869 /// A 64-bit integer vector of [4 x i16]. 870 /// \param __count 871 /// A 64-bit integer vector interpreted as a single 64-bit integer. 872 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 873 /// values. 874 static __inline__ __m64 __DEFAULT_FN_ATTRS 875 _mm_sra_pi16(__m64 __m, __m64 __count) 876 { 877 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 878 } 879 880 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector 881 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 882 /// 883 /// High-order bits are filled with the sign bit of the initial value of each 884 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 885 /// vector of [4 x i16]. 886 /// 887 /// \headerfile <x86intrin.h> 888 /// 889 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 890 /// 891 /// \param __m 892 /// A 64-bit integer vector of [4 x i16]. 893 /// \param __count 894 /// A 32-bit integer value. 895 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 896 /// values. 897 static __inline__ __m64 __DEFAULT_FN_ATTRS 898 _mm_srai_pi16(__m64 __m, int __count) 899 { 900 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 901 } 902 903 /// \brief Right-shifts each 32-bit integer element of the first parameter, 904 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 905 /// specified by the second parameter, which is a 64-bit integer. 906 /// 907 /// High-order bits are filled with the sign bit of the initial value of each 908 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 909 /// vector of [2 x i32]. 910 /// 911 /// \headerfile <x86intrin.h> 912 /// 913 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 914 /// 915 /// \param __m 916 /// A 64-bit integer vector of [2 x i32]. 917 /// \param __count 918 /// A 64-bit integer vector interpreted as a single 64-bit integer. 919 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 920 /// values. 921 static __inline__ __m64 __DEFAULT_FN_ATTRS 922 _mm_sra_pi32(__m64 __m, __m64 __count) 923 { 924 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 925 } 926 927 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector 928 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 929 /// 930 /// High-order bits are filled with the sign bit of the initial value of each 931 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 932 /// vector of [2 x i32]. 933 /// 934 /// \headerfile <x86intrin.h> 935 /// 936 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 937 /// 938 /// \param __m 939 /// A 64-bit integer vector of [2 x i32]. 940 /// \param __count 941 /// A 32-bit integer value. 942 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 943 /// values. 944 static __inline__ __m64 __DEFAULT_FN_ATTRS 945 _mm_srai_pi32(__m64 __m, int __count) 946 { 947 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 948 } 949 950 /// \brief Right-shifts each 16-bit integer element of the first parameter, 951 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 952 /// specified by the second parameter, which is a 64-bit integer. 953 /// 954 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 955 /// integer vector of [4 x i16]. 956 /// 957 /// \headerfile <x86intrin.h> 958 /// 959 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 960 /// 961 /// \param __m 962 /// A 64-bit integer vector of [4 x i16]. 963 /// \param __count 964 /// A 64-bit integer vector interpreted as a single 64-bit integer. 965 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 966 /// values. 967 static __inline__ __m64 __DEFAULT_FN_ATTRS 968 _mm_srl_pi16(__m64 __m, __m64 __count) 969 { 970 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 971 } 972 973 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector 974 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 975 /// 976 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 977 /// integer vector of [4 x i16]. 978 /// 979 /// \headerfile <x86intrin.h> 980 /// 981 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 982 /// 983 /// \param __m 984 /// A 64-bit integer vector of [4 x i16]. 985 /// \param __count 986 /// A 32-bit integer value. 987 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 988 /// values. 989 static __inline__ __m64 __DEFAULT_FN_ATTRS 990 _mm_srli_pi16(__m64 __m, int __count) 991 { 992 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 993 } 994 995 /// \brief Right-shifts each 32-bit integer element of the first parameter, 996 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 997 /// specified by the second parameter, which is a 64-bit integer. 998 /// 999 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1000 /// integer vector of [2 x i32]. 1001 /// 1002 /// \headerfile <x86intrin.h> 1003 /// 1004 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1005 /// 1006 /// \param __m 1007 /// A 64-bit integer vector of [2 x i32]. 1008 /// \param __count 1009 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1010 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1011 /// values. 1012 static __inline__ __m64 __DEFAULT_FN_ATTRS 1013 _mm_srl_pi32(__m64 __m, __m64 __count) 1014 { 1015 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1016 } 1017 1018 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector 1019 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 1020 /// 1021 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1022 /// integer vector of [2 x i32]. 1023 /// 1024 /// \headerfile <x86intrin.h> 1025 /// 1026 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1027 /// 1028 /// \param __m 1029 /// A 64-bit integer vector of [2 x i32]. 1030 /// \param __count 1031 /// A 32-bit integer value. 1032 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1033 /// values. 1034 static __inline__ __m64 __DEFAULT_FN_ATTRS 1035 _mm_srli_pi32(__m64 __m, int __count) 1036 { 1037 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1038 } 1039 1040 /// \brief Right-shifts the first 64-bit integer parameter by the number of bits 1041 /// specified by the second 64-bit integer parameter. 1042 /// 1043 /// High-order bits are cleared. 1044 /// 1045 /// \headerfile <x86intrin.h> 1046 /// 1047 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1048 /// 1049 /// \param __m 1050 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1051 /// \param __count 1052 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1053 /// \returns A 64-bit integer vector containing the right-shifted value. 1054 static __inline__ __m64 __DEFAULT_FN_ATTRS 1055 _mm_srl_si64(__m64 __m, __m64 __count) 1056 { 1057 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1058 } 1059 1060 /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the 1061 /// number of bits specified by the second parameter, which is a 32-bit 1062 /// integer. 1063 /// 1064 /// High-order bits are cleared. 1065 /// 1066 /// \headerfile <x86intrin.h> 1067 /// 1068 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1069 /// 1070 /// \param __m 1071 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1072 /// \param __count 1073 /// A 32-bit integer value. 1074 /// \returns A 64-bit integer vector containing the right-shifted value. 1075 static __inline__ __m64 __DEFAULT_FN_ATTRS 1076 _mm_srli_si64(__m64 __m, int __count) 1077 { 1078 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1079 } 1080 1081 /// \brief Performs a bitwise AND of two 64-bit integer vectors. 1082 /// 1083 /// \headerfile <x86intrin.h> 1084 /// 1085 /// This intrinsic corresponds to the <c> PAND </c> instruction. 1086 /// 1087 /// \param __m1 1088 /// A 64-bit integer vector. 1089 /// \param __m2 1090 /// A 64-bit integer vector. 1091 /// \returns A 64-bit integer vector containing the bitwise AND of both 1092 /// parameters. 1093 static __inline__ __m64 __DEFAULT_FN_ATTRS 1094 _mm_and_si64(__m64 __m1, __m64 __m2) 1095 { 1096 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1097 } 1098 1099 /// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then 1100 /// performs a bitwise AND of the intermediate result and the second 64-bit 1101 /// integer vector. 1102 /// 1103 /// \headerfile <x86intrin.h> 1104 /// 1105 /// This intrinsic corresponds to the <c> PANDN </c> instruction. 1106 /// 1107 /// \param __m1 1108 /// A 64-bit integer vector. The one's complement of this parameter is used 1109 /// in the bitwise AND. 1110 /// \param __m2 1111 /// A 64-bit integer vector. 1112 /// \returns A 64-bit integer vector containing the bitwise AND of the second 1113 /// parameter and the one's complement of the first parameter. 1114 static __inline__ __m64 __DEFAULT_FN_ATTRS 1115 _mm_andnot_si64(__m64 __m1, __m64 __m2) 1116 { 1117 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1118 } 1119 1120 /// \brief Performs a bitwise OR of two 64-bit integer vectors. 1121 /// 1122 /// \headerfile <x86intrin.h> 1123 /// 1124 /// This intrinsic corresponds to the <c> POR </c> instruction. 1125 /// 1126 /// \param __m1 1127 /// A 64-bit integer vector. 1128 /// \param __m2 1129 /// A 64-bit integer vector. 1130 /// \returns A 64-bit integer vector containing the bitwise OR of both 1131 /// parameters. 1132 static __inline__ __m64 __DEFAULT_FN_ATTRS 1133 _mm_or_si64(__m64 __m1, __m64 __m2) 1134 { 1135 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1136 } 1137 1138 /// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors. 1139 /// 1140 /// \headerfile <x86intrin.h> 1141 /// 1142 /// This intrinsic corresponds to the <c> PXOR </c> instruction. 1143 /// 1144 /// \param __m1 1145 /// A 64-bit integer vector. 1146 /// \param __m2 1147 /// A 64-bit integer vector. 1148 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1149 /// parameters. 1150 static __inline__ __m64 __DEFAULT_FN_ATTRS 1151 _mm_xor_si64(__m64 __m1, __m64 __m2) 1152 { 1153 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1154 } 1155 1156 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of 1157 /// [8 x i8] to determine if the element of the first vector is equal to the 1158 /// corresponding element of the second vector. 1159 /// 1160 /// The comparison yields 0 for false, 0xFF for true. 1161 /// 1162 /// \headerfile <x86intrin.h> 1163 /// 1164 /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 1165 /// 1166 /// \param __m1 1167 /// A 64-bit integer vector of [8 x i8]. 1168 /// \param __m2 1169 /// A 64-bit integer vector of [8 x i8]. 1170 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1171 /// results. 1172 static __inline__ __m64 __DEFAULT_FN_ATTRS 1173 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1174 { 1175 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1176 } 1177 1178 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of 1179 /// [4 x i16] to determine if the element of the first vector is equal to the 1180 /// corresponding element of the second vector. 1181 /// 1182 /// The comparison yields 0 for false, 0xFFFF for true. 1183 /// 1184 /// \headerfile <x86intrin.h> 1185 /// 1186 /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 1187 /// 1188 /// \param __m1 1189 /// A 64-bit integer vector of [4 x i16]. 1190 /// \param __m2 1191 /// A 64-bit integer vector of [4 x i16]. 1192 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1193 /// results. 1194 static __inline__ __m64 __DEFAULT_FN_ATTRS 1195 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1196 { 1197 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1198 } 1199 1200 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of 1201 /// [2 x i32] to determine if the element of the first vector is equal to the 1202 /// corresponding element of the second vector. 1203 /// 1204 /// The comparison yields 0 for false, 0xFFFFFFFF for true. 1205 /// 1206 /// \headerfile <x86intrin.h> 1207 /// 1208 /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 1209 /// 1210 /// \param __m1 1211 /// A 64-bit integer vector of [2 x i32]. 1212 /// \param __m2 1213 /// A 64-bit integer vector of [2 x i32]. 1214 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1215 /// results. 1216 static __inline__ __m64 __DEFAULT_FN_ATTRS 1217 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1218 { 1219 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1220 } 1221 1222 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of 1223 /// [8 x i8] to determine if the element of the first vector is greater than 1224 /// the corresponding element of the second vector. 1225 /// 1226 /// The comparison yields 0 for false, 0xFF for true. 1227 /// 1228 /// \headerfile <x86intrin.h> 1229 /// 1230 /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 1231 /// 1232 /// \param __m1 1233 /// A 64-bit integer vector of [8 x i8]. 1234 /// \param __m2 1235 /// A 64-bit integer vector of [8 x i8]. 1236 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1237 /// results. 1238 static __inline__ __m64 __DEFAULT_FN_ATTRS 1239 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1240 { 1241 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1242 } 1243 1244 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of 1245 /// [4 x i16] to determine if the element of the first vector is greater than 1246 /// the corresponding element of the second vector. 1247 /// 1248 /// The comparison yields 0 for false, 0xFFFF for true. 1249 /// 1250 /// \headerfile <x86intrin.h> 1251 /// 1252 /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 1253 /// 1254 /// \param __m1 1255 /// A 64-bit integer vector of [4 x i16]. 1256 /// \param __m2 1257 /// A 64-bit integer vector of [4 x i16]. 1258 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1259 /// results. 1260 static __inline__ __m64 __DEFAULT_FN_ATTRS 1261 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1262 { 1263 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1264 } 1265 1266 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of 1267 /// [2 x i32] to determine if the element of the first vector is greater than 1268 /// the corresponding element of the second vector. 1269 /// 1270 /// The comparison yields 0 for false, 0xFFFFFFFF for true. 1271 /// 1272 /// \headerfile <x86intrin.h> 1273 /// 1274 /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 1275 /// 1276 /// \param __m1 1277 /// A 64-bit integer vector of [2 x i32]. 1278 /// \param __m2 1279 /// A 64-bit integer vector of [2 x i32]. 1280 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1281 /// results. 1282 static __inline__ __m64 __DEFAULT_FN_ATTRS 1283 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1284 { 1285 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1286 } 1287 1288 /// \brief Constructs a 64-bit integer vector initialized to zero. 1289 /// 1290 /// \headerfile <x86intrin.h> 1291 /// 1292 /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 1293 /// 1294 /// \returns An initialized 64-bit integer vector with all elements set to zero. 1295 static __inline__ __m64 __DEFAULT_FN_ATTRS 1296 _mm_setzero_si64(void) 1297 { 1298 return (__m64){ 0LL }; 1299 } 1300 1301 /// \brief Constructs a 64-bit integer vector initialized with the specified 1302 /// 32-bit integer values. 1303 /// 1304 /// \headerfile <x86intrin.h> 1305 /// 1306 /// This intrinsic is a utility function and does not correspond to a specific 1307 /// instruction. 1308 /// 1309 /// \param __i1 1310 /// A 32-bit integer value used to initialize the upper 32 bits of the 1311 /// result. 1312 /// \param __i0 1313 /// A 32-bit integer value used to initialize the lower 32 bits of the 1314 /// result. 1315 /// \returns An initialized 64-bit integer vector. 1316 static __inline__ __m64 __DEFAULT_FN_ATTRS 1317 _mm_set_pi32(int __i1, int __i0) 1318 { 1319 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1320 } 1321 1322 /// \brief Constructs a 64-bit integer vector initialized with the specified 1323 /// 16-bit integer values. 1324 /// 1325 /// \headerfile <x86intrin.h> 1326 /// 1327 /// This intrinsic is a utility function and does not correspond to a specific 1328 /// instruction. 1329 /// 1330 /// \param __s3 1331 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1332 /// \param __s2 1333 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1334 /// \param __s1 1335 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1336 /// \param __s0 1337 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1338 /// \returns An initialized 64-bit integer vector. 1339 static __inline__ __m64 __DEFAULT_FN_ATTRS 1340 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1341 { 1342 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1343 } 1344 1345 /// \brief Constructs a 64-bit integer vector initialized with the specified 1346 /// 8-bit integer values. 1347 /// 1348 /// \headerfile <x86intrin.h> 1349 /// 1350 /// This intrinsic is a utility function and does not correspond to a specific 1351 /// instruction. 1352 /// 1353 /// \param __b7 1354 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1355 /// \param __b6 1356 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1357 /// \param __b5 1358 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1359 /// \param __b4 1360 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1361 /// \param __b3 1362 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1363 /// \param __b2 1364 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1365 /// \param __b1 1366 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1367 /// \param __b0 1368 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1369 /// \returns An initialized 64-bit integer vector. 1370 static __inline__ __m64 __DEFAULT_FN_ATTRS 1371 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1372 char __b1, char __b0) 1373 { 1374 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1375 __b4, __b5, __b6, __b7); 1376 } 1377 1378 /// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the 1379 /// 32-bit integer vector elements set to the specified 32-bit integer 1380 /// value. 1381 /// 1382 /// \headerfile <x86intrin.h> 1383 /// 1384 /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. 1385 /// 1386 /// \param __i 1387 /// A 32-bit integer value used to initialize each vector element of the 1388 /// result. 1389 /// \returns An initialized 64-bit integer vector of [2 x i32]. 1390 static __inline__ __m64 __DEFAULT_FN_ATTRS 1391 _mm_set1_pi32(int __i) 1392 { 1393 return _mm_set_pi32(__i, __i); 1394 } 1395 1396 /// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the 1397 /// 16-bit integer vector elements set to the specified 16-bit integer 1398 /// value. 1399 /// 1400 /// \headerfile <x86intrin.h> 1401 /// 1402 /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. 1403 /// 1404 /// \param __w 1405 /// A 16-bit integer value used to initialize each vector element of the 1406 /// result. 1407 /// \returns An initialized 64-bit integer vector of [4 x i16]. 1408 static __inline__ __m64 __DEFAULT_FN_ATTRS 1409 _mm_set1_pi16(short __w) 1410 { 1411 return _mm_set_pi16(__w, __w, __w, __w); 1412 } 1413 1414 /// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the 1415 /// 8-bit integer vector elements set to the specified 8-bit integer value. 1416 /// 1417 /// \headerfile <x86intrin.h> 1418 /// 1419 /// This intrinsic corresponds to the <c> VPUNPCKLBW + VPSHUFLW / PUNPCKLBW + 1420 /// PSHUFLW </c> instruction. 1421 /// 1422 /// \param __b 1423 /// An 8-bit integer value used to initialize each vector element of the 1424 /// result. 1425 /// \returns An initialized 64-bit integer vector of [8 x i8]. 1426 static __inline__ __m64 __DEFAULT_FN_ATTRS 1427 _mm_set1_pi8(char __b) 1428 { 1429 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1430 } 1431 1432 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1433 /// the specified 32-bit integer values. 1434 /// 1435 /// \headerfile <x86intrin.h> 1436 /// 1437 /// This intrinsic is a utility function and does not correspond to a specific 1438 /// instruction. 1439 /// 1440 /// \param __i0 1441 /// A 32-bit integer value used to initialize the lower 32 bits of the 1442 /// result. 1443 /// \param __i1 1444 /// A 32-bit integer value used to initialize the upper 32 bits of the 1445 /// result. 1446 /// \returns An initialized 64-bit integer vector. 1447 static __inline__ __m64 __DEFAULT_FN_ATTRS 1448 _mm_setr_pi32(int __i0, int __i1) 1449 { 1450 return _mm_set_pi32(__i1, __i0); 1451 } 1452 1453 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1454 /// the specified 16-bit integer values. 1455 /// 1456 /// \headerfile <x86intrin.h> 1457 /// 1458 /// This intrinsic is a utility function and does not correspond to a specific 1459 /// instruction. 1460 /// 1461 /// \param __w0 1462 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1463 /// \param __w1 1464 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1465 /// \param __w2 1466 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1467 /// \param __w3 1468 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1469 /// \returns An initialized 64-bit integer vector. 1470 static __inline__ __m64 __DEFAULT_FN_ATTRS 1471 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1472 { 1473 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1474 } 1475 1476 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1477 /// the specified 8-bit integer values. 1478 /// 1479 /// \headerfile <x86intrin.h> 1480 /// 1481 /// This intrinsic is a utility function and does not correspond to a specific 1482 /// instruction. 1483 /// 1484 /// \param __b0 1485 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1486 /// \param __b1 1487 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1488 /// \param __b2 1489 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1490 /// \param __b3 1491 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1492 /// \param __b4 1493 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1494 /// \param __b5 1495 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1496 /// \param __b6 1497 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1498 /// \param __b7 1499 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1500 /// \returns An initialized 64-bit integer vector. 1501 static __inline__ __m64 __DEFAULT_FN_ATTRS 1502 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1503 char __b6, char __b7) 1504 { 1505 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1506 } 1507 1508 #undef __DEFAULT_FN_ATTRS 1509 1510 /* Aliases for compatibility. */ 1511 #define _m_empty _mm_empty 1512 #define _m_from_int _mm_cvtsi32_si64 1513 #define _m_from_int64 _mm_cvtsi64_m64 1514 #define _m_to_int _mm_cvtsi64_si32 1515 #define _m_to_int64 _mm_cvtm64_si64 1516 #define _m_packsswb _mm_packs_pi16 1517 #define _m_packssdw _mm_packs_pi32 1518 #define _m_packuswb _mm_packs_pu16 1519 #define _m_punpckhbw _mm_unpackhi_pi8 1520 #define _m_punpckhwd _mm_unpackhi_pi16 1521 #define _m_punpckhdq _mm_unpackhi_pi32 1522 #define _m_punpcklbw _mm_unpacklo_pi8 1523 #define _m_punpcklwd _mm_unpacklo_pi16 1524 #define _m_punpckldq _mm_unpacklo_pi32 1525 #define _m_paddb _mm_add_pi8 1526 #define _m_paddw _mm_add_pi16 1527 #define _m_paddd _mm_add_pi32 1528 #define _m_paddsb _mm_adds_pi8 1529 #define _m_paddsw _mm_adds_pi16 1530 #define _m_paddusb _mm_adds_pu8 1531 #define _m_paddusw _mm_adds_pu16 1532 #define _m_psubb _mm_sub_pi8 1533 #define _m_psubw _mm_sub_pi16 1534 #define _m_psubd _mm_sub_pi32 1535 #define _m_psubsb _mm_subs_pi8 1536 #define _m_psubsw _mm_subs_pi16 1537 #define _m_psubusb _mm_subs_pu8 1538 #define _m_psubusw _mm_subs_pu16 1539 #define _m_pmaddwd _mm_madd_pi16 1540 #define _m_pmulhw _mm_mulhi_pi16 1541 #define _m_pmullw _mm_mullo_pi16 1542 #define _m_psllw _mm_sll_pi16 1543 #define _m_psllwi _mm_slli_pi16 1544 #define _m_pslld _mm_sll_pi32 1545 #define _m_pslldi _mm_slli_pi32 1546 #define _m_psllq _mm_sll_si64 1547 #define _m_psllqi _mm_slli_si64 1548 #define _m_psraw _mm_sra_pi16 1549 #define _m_psrawi _mm_srai_pi16 1550 #define _m_psrad _mm_sra_pi32 1551 #define _m_psradi _mm_srai_pi32 1552 #define _m_psrlw _mm_srl_pi16 1553 #define _m_psrlwi _mm_srli_pi16 1554 #define _m_psrld _mm_srl_pi32 1555 #define _m_psrldi _mm_srli_pi32 1556 #define _m_psrlq _mm_srl_si64 1557 #define _m_psrlqi _mm_srli_si64 1558 #define _m_pand _mm_and_si64 1559 #define _m_pandn _mm_andnot_si64 1560 #define _m_por _mm_or_si64 1561 #define _m_pxor _mm_xor_si64 1562 #define _m_pcmpeqb _mm_cmpeq_pi8 1563 #define _m_pcmpeqw _mm_cmpeq_pi16 1564 #define _m_pcmpeqd _mm_cmpeq_pi32 1565 #define _m_pcmpgtb _mm_cmpgt_pi8 1566 #define _m_pcmpgtw _mm_cmpgt_pi16 1567 #define _m_pcmpgtd _mm_cmpgt_pi32 1568 1569 #endif /* __MMINTRIN_H */ 1570 1571