1 /*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H 25 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." 26 #endif 27 28 #ifndef __BMIINTRIN_H 29 #define __BMIINTRIN_H 30 31 /// \brief Counts the number of trailing zero bits in the operand. 32 /// 33 /// \headerfile <x86intrin.h> 34 /// 35 /// \code 36 /// unsigned short _tzcnt_u16(unsigned short a); 37 /// \endcode 38 /// 39 /// This intrinsic corresponds to the \c TZCNT instruction. 40 /// 41 /// \param a 42 /// An unsigned 16-bit integer whose trailing zeros are to be counted. 43 /// \returns An unsigned 16-bit integer containing the number of trailing zero 44 /// bits in the operand. 45 #define _tzcnt_u16(a) (__tzcnt_u16((a))) 46 47 /// \brief Performs a bitwise AND of the second operand with the one's 48 /// complement of the first operand. 49 /// 50 /// \headerfile <x86intrin.h> 51 /// 52 /// \code 53 /// unsigned int _andn_u32(unsigned int a, unsigned int b); 54 /// \endcode 55 /// 56 /// This intrinsic corresponds to the \c ANDN instruction. 57 /// 58 /// \param a 59 /// An unsigned integer containing one of the operands. 60 /// \param b 61 /// An unsigned integer containing one of the operands. 62 /// \returns An unsigned integer containing the bitwise AND of the second 63 /// operand with the one's complement of the first operand. 64 #define _andn_u32(a, b) (__andn_u32((a), (b))) 65 66 /* _bextr_u32 != __bextr_u32 */ 67 /// \brief Clears all bits in the source except for the least significant bit 68 /// containing a value of 1 and returns the result. 69 /// 70 /// \headerfile <x86intrin.h> 71 /// 72 /// \code 73 /// unsigned int _blsi_u32(unsigned int a); 74 /// \endcode 75 /// 76 /// This intrinsic corresponds to the \c BLSI instruction. 77 /// 78 /// \param a 79 /// An unsigned integer whose bits are to be cleared. 80 /// \returns An unsigned integer containing the result of clearing the bits from 81 /// the source operand. 82 #define _blsi_u32(a) (__blsi_u32((a))) 83 84 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 85 /// including the least siginificant bit that is set to 1 in the source 86 /// operand and returns the result. 87 /// 88 /// \headerfile <x86intrin.h> 89 /// 90 /// \code 91 /// unsigned int _blsmsk_u32(unsigned int a); 92 /// \endcode 93 /// 94 /// This intrinsic corresponds to the \c BLSMSK instruction. 95 /// 96 /// \param a 97 /// An unsigned integer used to create the mask. 98 /// \returns An unsigned integer containing the newly created mask. 99 #define _blsmsk_u32(a) (__blsmsk_u32((a))) 100 101 /// \brief Clears the least siginificant bit that is set to 1 in the source 102 /// operand and returns the result. 103 /// 104 /// \headerfile <x86intrin.h> 105 /// 106 /// \code 107 /// unsigned int _blsr_u32(unsigned int a); 108 /// \endcode 109 /// 110 /// This intrinsic corresponds to the \c BLSR instruction. 111 /// 112 /// \param a 113 /// An unsigned integer containing the operand to be cleared. 114 /// \returns An unsigned integer containing the result of clearing the source 115 /// operand. 116 #define _blsr_u32(a) (__blsr_u32((a))) 117 118 /// \brief Counts the number of trailing zero bits in the operand. 119 /// 120 /// \headerfile <x86intrin.h> 121 /// 122 /// \code 123 /// unsigned int _tzcnt_u32(unsigned int a); 124 /// \endcode 125 /// 126 /// This intrinsic corresponds to the \c TZCNT instruction. 127 /// 128 /// \param a 129 /// An unsigned 32-bit integer whose trailing zeros are to be counted. 130 /// \returns An unsigned 32-bit integer containing the number of trailing zero 131 /// bits in the operand. 132 #define _tzcnt_u32(a) (__tzcnt_u32((a))) 133 134 /* Define the default attributes for the functions in this file. */ 135 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) 136 137 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT 138 instruction behaves as BSF on non-BMI targets, there is code that expects 139 to use it as a potentially faster version of BSF. */ 140 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 141 142 /// \brief Counts the number of trailing zero bits in the operand. 143 /// 144 /// \headerfile <x86intrin.h> 145 /// 146 /// This intrinsic corresponds to the \c TZCNT instruction. 147 /// 148 /// \param __X 149 /// An unsigned 16-bit integer whose trailing zeros are to be counted. 150 /// \returns An unsigned 16-bit integer containing the number of trailing zero 151 /// bits in the operand. 152 static __inline__ unsigned short __RELAXED_FN_ATTRS 153 __tzcnt_u16(unsigned short __X) 154 { 155 return __X ? __builtin_ctzs(__X) : 16; 156 } 157 158 /// \brief Performs a bitwise AND of the second operand with the one's 159 /// complement of the first operand. 160 /// 161 /// \headerfile <x86intrin.h> 162 /// 163 /// This intrinsic corresponds to the \c ANDN instruction. 164 /// 165 /// \param __X 166 /// An unsigned integer containing one of the operands. 167 /// \param __Y 168 /// An unsigned integer containing one of the operands. 169 /// \returns An unsigned integer containing the bitwise AND of the second 170 /// operand with the one's complement of the first operand. 171 static __inline__ unsigned int __DEFAULT_FN_ATTRS 172 __andn_u32(unsigned int __X, unsigned int __Y) 173 { 174 return ~__X & __Y; 175 } 176 177 /* AMD-specified, double-leading-underscore version of BEXTR */ 178 /// \brief Extracts the specified bits from the first operand and returns them 179 /// in the least significant bits of the result. 180 /// 181 /// \headerfile <x86intrin.h> 182 /// 183 /// This intrinsic corresponds to the \c BEXTR instruction. 184 /// 185 /// \param __X 186 /// An unsigned integer whose bits are to be extracted. 187 /// \param __Y 188 /// An unsigned integer used to specify which bits are extracted. Bits [7:0] 189 /// specify the index of the least significant bit. Bits [15:8] specify the 190 /// number of bits to be extracted. 191 /// \returns An unsigned integer whose least significant bits contain the 192 /// extracted bits. 193 static __inline__ unsigned int __DEFAULT_FN_ATTRS 194 __bextr_u32(unsigned int __X, unsigned int __Y) 195 { 196 return __builtin_ia32_bextr_u32(__X, __Y); 197 } 198 199 /* Intel-specified, single-leading-underscore version of BEXTR */ 200 /// \brief Extracts the specified bits from the first operand and returns them 201 /// in the least significant bits of the result. 202 /// 203 /// \headerfile <x86intrin.h> 204 /// 205 /// This intrinsic corresponds to the \c BEXTR instruction. 206 /// 207 /// \param __X 208 /// An unsigned integer whose bits are to be extracted. 209 /// \param __Y 210 /// An unsigned integer used to specify the index of the least significant 211 /// bit for the bits to be extracted. Bits [7:0] specify the index. 212 /// \param __Z 213 /// An unsigned integer used to specify the number of bits to be extracted. 214 /// Bits [7:0] specify the number of bits. 215 /// \returns An unsigned integer whose least significant bits contain the 216 /// extracted bits. 217 static __inline__ unsigned int __DEFAULT_FN_ATTRS 218 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) 219 { 220 return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); 221 } 222 223 /// \brief Clears all bits in the source except for the least significant bit 224 /// containing a value of 1 and returns the result. 225 /// 226 /// \headerfile <x86intrin.h> 227 /// 228 /// This intrinsic corresponds to the \c BLSI instruction. 229 /// 230 /// \param __X 231 /// An unsigned integer whose bits are to be cleared. 232 /// \returns An unsigned integer containing the result of clearing the bits from 233 /// the source operand. 234 static __inline__ unsigned int __DEFAULT_FN_ATTRS 235 __blsi_u32(unsigned int __X) 236 { 237 return __X & -__X; 238 } 239 240 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 241 /// including the least siginificant bit that is set to 1 in the source 242 /// operand and returns the result. 243 /// 244 /// \headerfile <x86intrin.h> 245 /// 246 /// This intrinsic corresponds to the \c BLSMSK instruction. 247 /// 248 /// \param __X 249 /// An unsigned integer used to create the mask. 250 /// \returns An unsigned integer containing the newly created mask. 251 static __inline__ unsigned int __DEFAULT_FN_ATTRS 252 __blsmsk_u32(unsigned int __X) 253 { 254 return __X ^ (__X - 1); 255 } 256 257 /// \brief Clears the least siginificant bit that is set to 1 in the source 258 /// operand and returns the result. 259 /// 260 /// \headerfile <x86intrin.h> 261 /// 262 /// This intrinsic corresponds to the \c BLSR instruction. 263 /// 264 /// \param __X 265 /// An unsigned integer containing the operand to be cleared. 266 /// \returns An unsigned integer containing the result of clearing the source 267 /// operand. 268 static __inline__ unsigned int __DEFAULT_FN_ATTRS 269 __blsr_u32(unsigned int __X) 270 { 271 return __X & (__X - 1); 272 } 273 274 /// \brief Counts the number of trailing zero bits in the operand. 275 /// 276 /// \headerfile <x86intrin.h> 277 /// 278 /// This intrinsic corresponds to the \c TZCNT instruction. 279 /// 280 /// \param __X 281 /// An unsigned 32-bit integer whose trailing zeros are to be counted. 282 /// \returns An unsigned 32-bit integer containing the number of trailing zero 283 /// bits in the operand. 284 static __inline__ unsigned int __RELAXED_FN_ATTRS 285 __tzcnt_u32(unsigned int __X) 286 { 287 return __X ? __builtin_ctz(__X) : 32; 288 } 289 290 /// \brief Counts the number of trailing zero bits in the operand. 291 /// 292 /// \headerfile <x86intrin.h> 293 /// 294 /// This intrinsic corresponds to the \c TZCNT instruction. 295 /// 296 /// \param __X 297 /// An unsigned 32-bit integer whose trailing zeros are to be counted. 298 /// \returns An 32-bit integer containing the number of trailing zero 299 /// bits in the operand. 300 static __inline__ int __RELAXED_FN_ATTRS 301 _mm_tzcnt_32(unsigned int __X) 302 { 303 return __X ? __builtin_ctz(__X) : 32; 304 } 305 306 #ifdef __x86_64__ 307 308 /// \brief Performs a bitwise AND of the second operand with the one's 309 /// complement of the first operand. 310 /// 311 /// \headerfile <x86intrin.h> 312 /// 313 /// \code 314 /// unsigned long long _andn_u64 (unsigned long long a, unsigned long long b); 315 /// \endcode 316 /// 317 /// This intrinsic corresponds to the \c ANDN instruction. 318 /// 319 /// \param a 320 /// An unsigned 64-bit integer containing one of the operands. 321 /// \param b 322 /// An unsigned 64-bit integer containing one of the operands. 323 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second 324 /// operand with the one's complement of the first operand. 325 #define _andn_u64(a, b) (__andn_u64((a), (b))) 326 327 /* _bextr_u64 != __bextr_u64 */ 328 /// \brief Clears all bits in the source except for the least significant bit 329 /// containing a value of 1 and returns the result. 330 /// 331 /// \headerfile <x86intrin.h> 332 /// 333 /// \code 334 /// unsigned long long _blsi_u64(unsigned long long a); 335 /// \endcode 336 /// 337 /// This intrinsic corresponds to the \c BLSI instruction. 338 /// 339 /// \param a 340 /// An unsigned 64-bit integer whose bits are to be cleared. 341 /// \returns An unsigned 64-bit integer containing the result of clearing the 342 /// bits from the source operand. 343 #define _blsi_u64(a) (__blsi_u64((a))) 344 345 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 346 /// including the least siginificant bit that is set to 1 in the source 347 /// operand and returns the result. 348 /// 349 /// \headerfile <x86intrin.h> 350 /// 351 /// \code 352 /// unsigned long long _blsmsk_u64(unsigned long long a); 353 /// \endcode 354 /// 355 /// This intrinsic corresponds to the \c BLSMSK instruction. 356 /// 357 /// \param a 358 /// An unsigned 64-bit integer used to create the mask. 359 /// \returns A unsigned 64-bit integer containing the newly created mask. 360 #define _blsmsk_u64(a) (__blsmsk_u64((a))) 361 362 /// \brief Clears the least siginificant bit that is set to 1 in the source 363 /// operand and returns the result. 364 /// 365 /// \headerfile <x86intrin.h> 366 /// 367 /// \code 368 /// unsigned long long _blsr_u64(unsigned long long a); 369 /// \endcode 370 /// 371 /// This intrinsic corresponds to the \c BLSR instruction. 372 /// 373 /// \param a 374 /// An unsigned 64-bit integer containing the operand to be cleared. 375 /// \returns An unsigned 64-bit integer containing the result of clearing the 376 /// source operand. 377 #define _blsr_u64(a) (__blsr_u64((a))) 378 379 /// \brief Counts the number of trailing zero bits in the operand. 380 /// 381 /// \headerfile <x86intrin.h> 382 /// 383 /// \code 384 /// unsigned long long _tzcnt_u64(unsigned long long a); 385 /// \endcode 386 /// 387 /// This intrinsic corresponds to the \c TZCNT instruction. 388 /// 389 /// \param a 390 /// An unsigned 64-bit integer whose trailing zeros are to be counted. 391 /// \returns An unsigned 64-bit integer containing the number of trailing zero 392 /// bits in the operand. 393 #define _tzcnt_u64(a) (__tzcnt_u64((a))) 394 395 /// \brief Performs a bitwise AND of the second operand with the one's 396 /// complement of the first operand. 397 /// 398 /// \headerfile <x86intrin.h> 399 /// 400 /// This intrinsic corresponds to the \c ANDN instruction. 401 /// 402 /// \param __X 403 /// An unsigned 64-bit integer containing one of the operands. 404 /// \param __Y 405 /// An unsigned 64-bit integer containing one of the operands. 406 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second 407 /// operand with the one's complement of the first operand. 408 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 409 __andn_u64 (unsigned long long __X, unsigned long long __Y) 410 { 411 return ~__X & __Y; 412 } 413 414 /* AMD-specified, double-leading-underscore version of BEXTR */ 415 /// \brief Extracts the specified bits from the first operand and returns them 416 /// in the least significant bits of the result. 417 /// 418 /// \headerfile <x86intrin.h> 419 /// 420 /// This intrinsic corresponds to the \c BEXTR instruction. 421 /// 422 /// \param __X 423 /// An unsigned 64-bit integer whose bits are to be extracted. 424 /// \param __Y 425 /// An unsigned 64-bit integer used to specify which bits are extracted. Bits 426 /// [7:0] specify the index of the least significant bit. Bits [15:8] specify 427 /// the number of bits to be extracted. 428 /// \returns An unsigned 64-bit integer whose least significant bits contain the 429 /// extracted bits. 430 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 431 __bextr_u64(unsigned long long __X, unsigned long long __Y) 432 { 433 return __builtin_ia32_bextr_u64(__X, __Y); 434 } 435 436 /* Intel-specified, single-leading-underscore version of BEXTR */ 437 /// \brief Extracts the specified bits from the first operand and returns them 438 /// in the least significant bits of the result. 439 /// 440 /// \headerfile <x86intrin.h> 441 /// 442 /// This intrinsic corresponds to the \c BEXTR instruction. 443 /// 444 /// \param __X 445 /// An unsigned 64-bit integer whose bits are to be extracted. 446 /// \param __Y 447 /// An unsigned integer used to specify the index of the least significant 448 /// bit for the bits to be extracted. Bits [7:0] specify the index. 449 /// \param __Z 450 /// An unsigned integer used to specify the number of bits to be extracted. 451 /// Bits [7:0] specify the number of bits. 452 /// \returns An unsigned 64-bit integer whose least significant bits contain the 453 /// extracted bits. 454 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 455 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) 456 { 457 return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); 458 } 459 460 /// \brief Clears all bits in the source except for the least significant bit 461 /// containing a value of 1 and returns the result. 462 /// 463 /// \headerfile <x86intrin.h> 464 /// 465 /// This intrinsic corresponds to the \c BLSI instruction. 466 /// 467 /// \param __X 468 /// An unsigned 64-bit integer whose bits are to be cleared. 469 /// \returns An unsigned 64-bit integer containing the result of clearing the 470 /// bits from the source operand. 471 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 472 __blsi_u64(unsigned long long __X) 473 { 474 return __X & -__X; 475 } 476 477 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 478 /// including the least siginificant bit that is set to 1 in the source 479 /// operand and returns the result. 480 /// 481 /// \headerfile <x86intrin.h> 482 /// 483 /// This intrinsic corresponds to the \c BLSMSK instruction. 484 /// 485 /// \param __X 486 /// An unsigned 64-bit integer used to create the mask. 487 /// \returns A unsigned 64-bit integer containing the newly created mask. 488 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 489 __blsmsk_u64(unsigned long long __X) 490 { 491 return __X ^ (__X - 1); 492 } 493 494 /// \brief Clears the least siginificant bit that is set to 1 in the source 495 /// operand and returns the result. 496 /// 497 /// \headerfile <x86intrin.h> 498 /// 499 /// This intrinsic corresponds to the \c BLSR instruction. 500 /// 501 /// \param __X 502 /// An unsigned 64-bit integer containing the operand to be cleared. 503 /// \returns An unsigned 64-bit integer containing the result of clearing the 504 /// source operand. 505 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 506 __blsr_u64(unsigned long long __X) 507 { 508 return __X & (__X - 1); 509 } 510 511 /// \brief Counts the number of trailing zero bits in the operand. 512 /// 513 /// \headerfile <x86intrin.h> 514 /// 515 /// This intrinsic corresponds to the \c TZCNT instruction. 516 /// 517 /// \param __X 518 /// An unsigned 64-bit integer whose trailing zeros are to be counted. 519 /// \returns An unsigned 64-bit integer containing the number of trailing zero 520 /// bits in the operand. 521 static __inline__ unsigned long long __RELAXED_FN_ATTRS 522 __tzcnt_u64(unsigned long long __X) 523 { 524 return __X ? __builtin_ctzll(__X) : 64; 525 } 526 527 /// \brief Counts the number of trailing zero bits in the operand. 528 /// 529 /// \headerfile <x86intrin.h> 530 /// 531 /// This intrinsic corresponds to the \c TZCNT instruction. 532 /// 533 /// \param __X 534 /// An unsigned 64-bit integer whose trailing zeros are to be counted. 535 /// \returns An 64-bit integer containing the number of trailing zero 536 /// bits in the operand. 537 static __inline__ long long __RELAXED_FN_ATTRS 538 _mm_tzcnt_64(unsigned long long __X) 539 { 540 return __X ? __builtin_ctzll(__X) : 64; 541 } 542 543 #endif /* __x86_64__ */ 544 545 #undef __DEFAULT_FN_ATTRS 546 #undef __RELAXED_FN_ATTRS 547 548 #endif /* __BMIINTRIN_H */ 549