1 /*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H 25 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." 26 #endif 27 28 #ifndef __BMIINTRIN_H 29 #define __BMIINTRIN_H 30 31 /// \brief Counts the number of trailing zero bits in the operand. 32 /// 33 /// \headerfile <x86intrin.h> 34 /// 35 /// \code 36 /// unsigned short _tzcnt_u16(unsigned short a); 37 /// \endcode 38 /// 39 /// This intrinsic corresponds to the \c TZCNT instruction. 40 /// 41 /// \param a 42 /// An unsigned 16-bit integer whose trailing zeros are to be counted. 43 /// \returns An unsigned 16-bit integer containing the number of trailing zero 44 /// bits in the operand. 45 #define _tzcnt_u16(a) (__tzcnt_u16((a))) 46 47 /// \brief Performs a bitwise AND of the second operand with the one's 48 /// complement of the first operand. 49 /// 50 /// \headerfile <x86intrin.h> 51 /// 52 /// \code 53 /// unsigned int _andn_u32(unsigned int a, unsigned int b); 54 /// \endcode 55 /// 56 /// This intrinsic corresponds to the \c ANDN instruction. 57 /// 58 /// \param a 59 /// An unsigned integer containing one of the operands. 60 /// \param b 61 /// An unsigned integer containing one of the operands. 62 /// \returns An unsigned integer containing the bitwise AND of the second 63 /// operand with the one's complement of the first operand. 64 #define _andn_u32(a, b) (__andn_u32((a), (b))) 65 66 /* _bextr_u32 != __bextr_u32 */ 67 /// \brief Clears all bits in the source except for the least significant bit 68 /// containing a value of 1 and returns the result. 69 /// 70 /// \headerfile <x86intrin.h> 71 /// 72 /// \code 73 /// unsigned int _blsi_u32(unsigned int a); 74 /// \endcode 75 /// 76 /// This intrinsic corresponds to the \c BLSI instruction. 77 /// 78 /// \param a 79 /// An unsigned integer whose bits are to be cleared. 80 /// \returns An unsigned integer containing the result of clearing the bits from 81 /// the source operand. 82 #define _blsi_u32(a) (__blsi_u32((a))) 83 84 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 85 /// including the least siginificant bit that is set to 1 in the source 86 /// operand and returns the result. 87 /// 88 /// \headerfile <x86intrin.h> 89 /// 90 /// \code 91 /// unsigned int _blsmsk_u32(unsigned int a); 92 /// \endcode 93 /// 94 /// This intrinsic corresponds to the \c BLSMSK instruction. 95 /// 96 /// \param a 97 /// An unsigned integer used to create the mask. 98 /// \returns An unsigned integer containing the newly created mask. 99 #define _blsmsk_u32(a) (__blsmsk_u32((a))) 100 101 /// \brief Clears the least siginificant bit that is set to 1 in the source 102 /// operand and returns the result. 103 /// 104 /// \headerfile <x86intrin.h> 105 /// 106 /// \code 107 /// unsigned int _blsr_u32(unsigned int a); 108 /// \endcode 109 /// 110 /// This intrinsic corresponds to the \c BLSR instruction. 111 /// 112 /// \param a 113 /// An unsigned integer containing the operand to be cleared. 114 /// \returns An unsigned integer containing the result of clearing the source 115 /// operand. 116 #define _blsr_u32(a) (__blsr_u32((a))) 117 118 /// \brief Counts the number of trailing zero bits in the operand. 119 /// 120 /// \headerfile <x86intrin.h> 121 /// 122 /// \code 123 /// unsigned int _tzcnt_u32(unsigned int a); 124 /// \endcode 125 /// 126 /// This intrinsic corresponds to the \c TZCNT instruction. 127 /// 128 /// \param a 129 /// An unsigned 32-bit integer whose trailing zeros are to be counted. 130 /// \returns An unsigned 32-bit integer containing the number of trailing zero 131 /// bits in the operand. 132 #define _tzcnt_u32(a) (__tzcnt_u32((a))) 133 134 /* Define the default attributes for the functions in this file. */ 135 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) 136 137 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT 138 instruction behaves as BSF on non-BMI targets, there is code that expects 139 to use it as a potentially faster version of BSF. */ 140 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 141 142 /// \brief Counts the number of trailing zero bits in the operand. 143 /// 144 /// \headerfile <x86intrin.h> 145 /// 146 /// This intrinsic corresponds to the \c TZCNT instruction. 147 /// 148 /// \param __X 149 /// An unsigned 16-bit integer whose trailing zeros are to be counted. 150 /// \returns An unsigned 16-bit integer containing the number of trailing zero 151 /// bits in the operand. 152 static __inline__ unsigned short __RELAXED_FN_ATTRS 153 __tzcnt_u16(unsigned short __X) 154 { 155 return __X ? __builtin_ctzs(__X) : 16; 156 } 157 158 /// \brief Performs a bitwise AND of the second operand with the one's 159 /// complement of the first operand. 160 /// 161 /// \headerfile <x86intrin.h> 162 /// 163 /// This intrinsic corresponds to the \c ANDN instruction. 164 /// 165 /// \param __X 166 /// An unsigned integer containing one of the operands. 167 /// \param __Y 168 /// An unsigned integer containing one of the operands. 169 /// \returns An unsigned integer containing the bitwise AND of the second 170 /// operand with the one's complement of the first operand. 171 static __inline__ unsigned int __DEFAULT_FN_ATTRS 172 __andn_u32(unsigned int __X, unsigned int __Y) 173 { 174 return ~__X & __Y; 175 } 176 177 /* AMD-specified, double-leading-underscore version of BEXTR */ 178 /// \brief Extracts the specified bits from the first operand and returns them 179 /// in the least significant bits of the result. 180 /// 181 /// \headerfile <x86intrin.h> 182 /// 183 /// This intrinsic corresponds to the \c BEXTR instruction. 184 /// 185 /// \param __X 186 /// An unsigned integer whose bits are to be extracted. 187 /// \param __Y 188 /// An unsigned integer used to specify which bits are extracted. Bits [7:0] 189 /// specify the index of the least significant bit. Bits [15:8] specify the 190 /// number of bits to be extracted. 191 /// \returns An unsigned integer whose least significant bits contain the 192 /// extracted bits. 193 static __inline__ unsigned int __DEFAULT_FN_ATTRS 194 __bextr_u32(unsigned int __X, unsigned int __Y) 195 { 196 return __builtin_ia32_bextr_u32(__X, __Y); 197 } 198 199 /* Intel-specified, single-leading-underscore version of BEXTR */ 200 /// \brief Extracts the specified bits from the first operand and returns them 201 /// in the least significant bits of the result. 202 /// 203 /// \headerfile <x86intrin.h> 204 /// 205 /// This intrinsic corresponds to the \c BEXTR instruction. 206 /// 207 /// \param __X 208 /// An unsigned integer whose bits are to be extracted. 209 /// \param __Y 210 /// An unsigned integer used to specify the index of the least significant 211 /// bit for the bits to be extracted. Bits [7:0] specify the index. 212 /// \param __Z 213 /// An unsigned integer used to specify the number of bits to be extracted. 214 /// Bits [7:0] specify the number of bits. 215 /// \returns An unsigned integer whose least significant bits contain the 216 /// extracted bits. 217 static __inline__ unsigned int __DEFAULT_FN_ATTRS 218 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) 219 { 220 return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); 221 } 222 223 /// \brief Clears all bits in the source except for the least significant bit 224 /// containing a value of 1 and returns the result. 225 /// 226 /// \headerfile <x86intrin.h> 227 /// 228 /// This intrinsic corresponds to the \c BLSI instruction. 229 /// 230 /// \param __X 231 /// An unsigned integer whose bits are to be cleared. 232 /// \returns An unsigned integer containing the result of clearing the bits from 233 /// the source operand. 234 static __inline__ unsigned int __DEFAULT_FN_ATTRS 235 __blsi_u32(unsigned int __X) 236 { 237 return __X & -__X; 238 } 239 240 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 241 /// including the least siginificant bit that is set to 1 in the source 242 /// operand and returns the result. 243 /// 244 /// \headerfile <x86intrin.h> 245 /// 246 /// This intrinsic corresponds to the \c BLSMSK instruction. 247 /// 248 /// \param __X 249 /// An unsigned integer used to create the mask. 250 /// \returns An unsigned integer containing the newly created mask. 251 static __inline__ unsigned int __DEFAULT_FN_ATTRS 252 __blsmsk_u32(unsigned int __X) 253 { 254 return __X ^ (__X - 1); 255 } 256 257 /// \brief Clears the least siginificant bit that is set to 1 in the source 258 /// operand and returns the result. 259 /// 260 /// \headerfile <x86intrin.h> 261 /// 262 /// This intrinsic corresponds to the \c BLSR instruction. 263 /// 264 /// \param __X 265 /// An unsigned integer containing the operand to be cleared. 266 /// \returns An unsigned integer containing the result of clearing the source 267 /// operand. 268 static __inline__ unsigned int __DEFAULT_FN_ATTRS 269 __blsr_u32(unsigned int __X) 270 { 271 return __X & (__X - 1); 272 } 273 274 /// \brief Counts the number of trailing zero bits in the operand. 275 /// 276 /// \headerfile <x86intrin.h> 277 /// 278 /// This intrinsic corresponds to the \c TZCNT instruction. 279 /// 280 /// \param __X 281 /// An unsigned 32-bit integer whose trailing zeros are to be counted. 282 /// \returns An unsigned 32-bit integer containing the number of trailing zero 283 /// bits in the operand. 284 static __inline__ unsigned int __RELAXED_FN_ATTRS 285 __tzcnt_u32(unsigned int __X) 286 { 287 return __X ? __builtin_ctz(__X) : 32; 288 } 289 290 #ifdef __x86_64__ 291 292 /// \brief Performs a bitwise AND of the second operand with the one's 293 /// complement of the first operand. 294 /// 295 /// \headerfile <x86intrin.h> 296 /// 297 /// \code 298 /// unsigned long long _andn_u64 (unsigned long long a, unsigned long long b); 299 /// \endcode 300 /// 301 /// This intrinsic corresponds to the \c ANDN instruction. 302 /// 303 /// \param a 304 /// An unsigned 64-bit integer containing one of the operands. 305 /// \param b 306 /// An unsigned 64-bit integer containing one of the operands. 307 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second 308 /// operand with the one's complement of the first operand. 309 #define _andn_u64(a, b) (__andn_u64((a), (b))) 310 311 /* _bextr_u64 != __bextr_u64 */ 312 /// \brief Clears all bits in the source except for the least significant bit 313 /// containing a value of 1 and returns the result. 314 /// 315 /// \headerfile <x86intrin.h> 316 /// 317 /// \code 318 /// unsigned long long _blsi_u64(unsigned long long a); 319 /// \endcode 320 /// 321 /// This intrinsic corresponds to the \c BLSI instruction. 322 /// 323 /// \param a 324 /// An unsigned 64-bit integer whose bits are to be cleared. 325 /// \returns An unsigned 64-bit integer containing the result of clearing the 326 /// bits from the source operand. 327 #define _blsi_u64(a) (__blsi_u64((a))) 328 329 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 330 /// including the least siginificant bit that is set to 1 in the source 331 /// operand and returns the result. 332 /// 333 /// \headerfile <x86intrin.h> 334 /// 335 /// \code 336 /// unsigned long long _blsmsk_u64(unsigned long long a); 337 /// \endcode 338 /// 339 /// This intrinsic corresponds to the \c BLSMSK instruction. 340 /// 341 /// \param a 342 /// An unsigned 64-bit integer used to create the mask. 343 /// \returns A unsigned 64-bit integer containing the newly created mask. 344 #define _blsmsk_u64(a) (__blsmsk_u64((a))) 345 346 /// \brief Clears the least siginificant bit that is set to 1 in the source 347 /// operand and returns the result. 348 /// 349 /// \headerfile <x86intrin.h> 350 /// 351 /// \code 352 /// unsigned long long _blsr_u64(unsigned long long a); 353 /// \endcode 354 /// 355 /// This intrinsic corresponds to the \c BLSR instruction. 356 /// 357 /// \param a 358 /// An unsigned 64-bit integer containing the operand to be cleared. 359 /// \returns An unsigned 64-bit integer containing the result of clearing the 360 /// source operand. 361 #define _blsr_u64(a) (__blsr_u64((a))) 362 363 /// \brief Counts the number of trailing zero bits in the operand. 364 /// 365 /// \headerfile <x86intrin.h> 366 /// 367 /// \code 368 /// unsigned long long _tzcnt_u64(unsigned long long a); 369 /// \endcode 370 /// 371 /// This intrinsic corresponds to the \c TZCNT instruction. 372 /// 373 /// \param a 374 /// An unsigned 64-bit integer whose trailing zeros are to be counted. 375 /// \returns An unsigned 64-bit integer containing the number of trailing zero 376 /// bits in the operand. 377 #define _tzcnt_u64(a) (__tzcnt_u64((a))) 378 379 /// \brief Performs a bitwise AND of the second operand with the one's 380 /// complement of the first operand. 381 /// 382 /// \headerfile <x86intrin.h> 383 /// 384 /// This intrinsic corresponds to the \c ANDN instruction. 385 /// 386 /// \param __X 387 /// An unsigned 64-bit integer containing one of the operands. 388 /// \param __Y 389 /// An unsigned 64-bit integer containing one of the operands. 390 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second 391 /// operand with the one's complement of the first operand. 392 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 393 __andn_u64 (unsigned long long __X, unsigned long long __Y) 394 { 395 return ~__X & __Y; 396 } 397 398 /* AMD-specified, double-leading-underscore version of BEXTR */ 399 /// \brief Extracts the specified bits from the first operand and returns them 400 /// in the least significant bits of the result. 401 /// 402 /// \headerfile <x86intrin.h> 403 /// 404 /// This intrinsic corresponds to the \c BEXTR instruction. 405 /// 406 /// \param __X 407 /// An unsigned 64-bit integer whose bits are to be extracted. 408 /// \param __Y 409 /// An unsigned 64-bit integer used to specify which bits are extracted. Bits 410 /// [7:0] specify the index of the least significant bit. Bits [15:8] specify 411 /// the number of bits to be extracted. 412 /// \returns An unsigned 64-bit integer whose least significant bits contain the 413 /// extracted bits. 414 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 415 __bextr_u64(unsigned long long __X, unsigned long long __Y) 416 { 417 return __builtin_ia32_bextr_u64(__X, __Y); 418 } 419 420 /* Intel-specified, single-leading-underscore version of BEXTR */ 421 /// \brief Extracts the specified bits from the first operand and returns them 422 /// in the least significant bits of the result. 423 /// 424 /// \headerfile <x86intrin.h> 425 /// 426 /// This intrinsic corresponds to the \c BEXTR instruction. 427 /// 428 /// \param __X 429 /// An unsigned 64-bit integer whose bits are to be extracted. 430 /// \param __Y 431 /// An unsigned integer used to specify the index of the least significant 432 /// bit for the bits to be extracted. Bits [7:0] specify the index. 433 /// \param __Z 434 /// An unsigned integer used to specify the number of bits to be extracted. 435 /// Bits [7:0] specify the number of bits. 436 /// \returns An unsigned 64-bit integer whose least significant bits contain the 437 /// extracted bits. 438 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 439 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) 440 { 441 return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); 442 } 443 444 /// \brief Clears all bits in the source except for the least significant bit 445 /// containing a value of 1 and returns the result. 446 /// 447 /// \headerfile <x86intrin.h> 448 /// 449 /// This intrinsic corresponds to the \c BLSI instruction. 450 /// 451 /// \param __X 452 /// An unsigned 64-bit integer whose bits are to be cleared. 453 /// \returns An unsigned 64-bit integer containing the result of clearing the 454 /// bits from the source operand. 455 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 456 __blsi_u64(unsigned long long __X) 457 { 458 return __X & -__X; 459 } 460 461 /// \brief Creates a mask whose bits are set to 1, using bit 0 up to and 462 /// including the least siginificant bit that is set to 1 in the source 463 /// operand and returns the result. 464 /// 465 /// \headerfile <x86intrin.h> 466 /// 467 /// This intrinsic corresponds to the \c BLSMSK instruction. 468 /// 469 /// \param __X 470 /// An unsigned 64-bit integer used to create the mask. 471 /// \returns A unsigned 64-bit integer containing the newly created mask. 472 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 473 __blsmsk_u64(unsigned long long __X) 474 { 475 return __X ^ (__X - 1); 476 } 477 478 /// \brief Clears the least siginificant bit that is set to 1 in the source 479 /// operand and returns the result. 480 /// 481 /// \headerfile <x86intrin.h> 482 /// 483 /// This intrinsic corresponds to the \c BLSR instruction. 484 /// 485 /// \param __X 486 /// An unsigned 64-bit integer containing the operand to be cleared. 487 /// \returns An unsigned 64-bit integer containing the result of clearing the 488 /// source operand. 489 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 490 __blsr_u64(unsigned long long __X) 491 { 492 return __X & (__X - 1); 493 } 494 495 /// \brief Counts the number of trailing zero bits in the operand. 496 /// 497 /// \headerfile <x86intrin.h> 498 /// 499 /// This intrinsic corresponds to the \c TZCNT instruction. 500 /// 501 /// \param __X 502 /// An unsigned 64-bit integer whose trailing zeros are to be counted. 503 /// \returns An unsigned 64-bit integer containing the number of trailing zero 504 /// bits in the operand. 505 static __inline__ unsigned long long __RELAXED_FN_ATTRS 506 __tzcnt_u64(unsigned long long __X) 507 { 508 return __X ? __builtin_ctzll(__X) : 64; 509 } 510 511 #endif /* __x86_64__ */ 512 513 #undef __DEFAULT_FN_ATTRS 514 #undef __RELAXED_FN_ATTRS 515 516 #endif /* __BMIINTRIN_H */ 517