1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X64 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X32-CLZ 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X64-CLZ 6 7 declare i8 @llvm.cttz.i8(i8, i1) 8 declare i16 @llvm.cttz.i16(i16, i1) 9 declare i32 @llvm.cttz.i32(i32, i1) 10 declare i64 @llvm.cttz.i64(i64, i1) 11 12 declare i8 @llvm.ctlz.i8(i8, i1) 13 declare i16 @llvm.ctlz.i16(i16, i1) 14 declare i32 @llvm.ctlz.i32(i32, i1) 15 declare i64 @llvm.ctlz.i64(i64, i1) 16 17 define i8 @cttz_i8(i8 %x) { 18 ; X32-LABEL: cttz_i8: 19 ; X32: # %bb.0: 20 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 21 ; X32-NEXT: bsfl %eax, %eax 22 ; X32-NEXT: # kill: def $al killed $al killed $eax 23 ; X32-NEXT: retl 24 ; 25 ; X64-LABEL: cttz_i8: 26 ; X64: # %bb.0: 27 ; X64-NEXT: movzbl %dil, %eax 28 ; X64-NEXT: bsfl %eax, %eax 29 ; X64-NEXT: # kill: def $al killed $al killed $eax 30 ; X64-NEXT: retq 31 ; 32 ; X32-CLZ-LABEL: cttz_i8: 33 ; X32-CLZ: # %bb.0: 34 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 35 ; X32-CLZ-NEXT: tzcntl %eax, %eax 36 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax 37 ; X32-CLZ-NEXT: retl 38 ; 39 ; X64-CLZ-LABEL: cttz_i8: 40 ; X64-CLZ: # %bb.0: 41 ; X64-CLZ-NEXT: movzbl %dil, %eax 42 ; X64-CLZ-NEXT: tzcntl %eax, %eax 43 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 44 ; X64-CLZ-NEXT: retq 45 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true ) 46 ret i8 %tmp 47 } 48 49 define i16 @cttz_i16(i16 %x) { 50 ; X32-LABEL: cttz_i16: 51 ; X32: # %bb.0: 52 ; X32-NEXT: bsfw {{[0-9]+}}(%esp), %ax 53 ; X32-NEXT: retl 54 ; 55 ; X64-LABEL: cttz_i16: 56 ; X64: # %bb.0: 57 ; X64-NEXT: bsfw %di, %ax 58 ; X64-NEXT: retq 59 ; 60 ; X32-CLZ-LABEL: cttz_i16: 61 ; X32-CLZ: # %bb.0: 62 ; X32-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax 63 ; X32-CLZ-NEXT: retl 64 ; 65 ; X64-CLZ-LABEL: cttz_i16: 66 ; X64-CLZ: # %bb.0: 67 ; X64-CLZ-NEXT: tzcntw %di, %ax 68 ; X64-CLZ-NEXT: retq 69 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true ) 70 ret i16 %tmp 71 } 72 73 define i32 @cttz_i32(i32 %x) { 74 ; X32-LABEL: cttz_i32: 75 ; X32: # %bb.0: 76 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax 77 ; X32-NEXT: retl 78 ; 79 ; X64-LABEL: cttz_i32: 80 ; X64: # %bb.0: 81 ; X64-NEXT: bsfl %edi, %eax 82 ; X64-NEXT: retq 83 ; 84 ; X32-CLZ-LABEL: cttz_i32: 85 ; X32-CLZ: # %bb.0: 86 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 87 ; X32-CLZ-NEXT: retl 88 ; 89 ; X64-CLZ-LABEL: cttz_i32: 90 ; X64-CLZ: # %bb.0: 91 ; X64-CLZ-NEXT: tzcntl %edi, %eax 92 ; X64-CLZ-NEXT: retq 93 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true ) 94 ret i32 %tmp 95 } 96 97 define i64 @cttz_i64(i64 %x) { 98 ; X32-LABEL: cttz_i64: 99 ; X32: # %bb.0: 100 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 101 ; X32-NEXT: testl %eax, %eax 102 ; X32-NEXT: jne .LBB3_1 103 ; X32-NEXT: # %bb.2: 104 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax 105 ; X32-NEXT: addl $32, %eax 106 ; X32-NEXT: xorl %edx, %edx 107 ; X32-NEXT: retl 108 ; X32-NEXT: .LBB3_1: 109 ; X32-NEXT: bsfl %eax, %eax 110 ; X32-NEXT: xorl %edx, %edx 111 ; X32-NEXT: retl 112 ; 113 ; X64-LABEL: cttz_i64: 114 ; X64: # %bb.0: 115 ; X64-NEXT: bsfq %rdi, %rax 116 ; X64-NEXT: retq 117 ; 118 ; X32-CLZ-LABEL: cttz_i64: 119 ; X32-CLZ: # %bb.0: 120 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 121 ; X32-CLZ-NEXT: testl %eax, %eax 122 ; X32-CLZ-NEXT: jne .LBB3_1 123 ; X32-CLZ-NEXT: # %bb.2: 124 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 125 ; X32-CLZ-NEXT: addl $32, %eax 126 ; X32-CLZ-NEXT: xorl %edx, %edx 127 ; X32-CLZ-NEXT: retl 128 ; X32-CLZ-NEXT: .LBB3_1: 129 ; X32-CLZ-NEXT: tzcntl %eax, %eax 130 ; X32-CLZ-NEXT: xorl %edx, %edx 131 ; X32-CLZ-NEXT: retl 132 ; 133 ; X64-CLZ-LABEL: cttz_i64: 134 ; X64-CLZ: # %bb.0: 135 ; X64-CLZ-NEXT: tzcntq %rdi, %rax 136 ; X64-CLZ-NEXT: retq 137 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true ) 138 ret i64 %tmp 139 } 140 141 define i8 @ctlz_i8(i8 %x) { 142 ; X32-LABEL: ctlz_i8: 143 ; X32: # %bb.0: 144 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 145 ; X32-NEXT: bsrl %eax, %eax 146 ; X32-NEXT: xorl $7, %eax 147 ; X32-NEXT: # kill: def $al killed $al killed $eax 148 ; X32-NEXT: retl 149 ; 150 ; X64-LABEL: ctlz_i8: 151 ; X64: # %bb.0: 152 ; X64-NEXT: movzbl %dil, %eax 153 ; X64-NEXT: bsrl %eax, %eax 154 ; X64-NEXT: xorl $7, %eax 155 ; X64-NEXT: # kill: def $al killed $al killed $eax 156 ; X64-NEXT: retq 157 ; 158 ; X32-CLZ-LABEL: ctlz_i8: 159 ; X32-CLZ: # %bb.0: 160 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 161 ; X32-CLZ-NEXT: lzcntl %eax, %eax 162 ; X32-CLZ-NEXT: addl $-24, %eax 163 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax 164 ; X32-CLZ-NEXT: retl 165 ; 166 ; X64-CLZ-LABEL: ctlz_i8: 167 ; X64-CLZ: # %bb.0: 168 ; X64-CLZ-NEXT: movzbl %dil, %eax 169 ; X64-CLZ-NEXT: lzcntl %eax, %eax 170 ; X64-CLZ-NEXT: addl $-24, %eax 171 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 172 ; X64-CLZ-NEXT: retq 173 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true ) 174 ret i8 %tmp2 175 } 176 177 define i16 @ctlz_i16(i16 %x) { 178 ; X32-LABEL: ctlz_i16: 179 ; X32: # %bb.0: 180 ; X32-NEXT: bsrw {{[0-9]+}}(%esp), %ax 181 ; X32-NEXT: xorl $15, %eax 182 ; X32-NEXT: # kill: def $ax killed $ax killed $eax 183 ; X32-NEXT: retl 184 ; 185 ; X64-LABEL: ctlz_i16: 186 ; X64: # %bb.0: 187 ; X64-NEXT: bsrw %di, %ax 188 ; X64-NEXT: xorl $15, %eax 189 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 190 ; X64-NEXT: retq 191 ; 192 ; X32-CLZ-LABEL: ctlz_i16: 193 ; X32-CLZ: # %bb.0: 194 ; X32-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 195 ; X32-CLZ-NEXT: retl 196 ; 197 ; X64-CLZ-LABEL: ctlz_i16: 198 ; X64-CLZ: # %bb.0: 199 ; X64-CLZ-NEXT: lzcntw %di, %ax 200 ; X64-CLZ-NEXT: retq 201 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true ) 202 ret i16 %tmp2 203 } 204 205 define i32 @ctlz_i32(i32 %x) { 206 ; X32-LABEL: ctlz_i32: 207 ; X32: # %bb.0: 208 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax 209 ; X32-NEXT: xorl $31, %eax 210 ; X32-NEXT: retl 211 ; 212 ; X64-LABEL: ctlz_i32: 213 ; X64: # %bb.0: 214 ; X64-NEXT: bsrl %edi, %eax 215 ; X64-NEXT: xorl $31, %eax 216 ; X64-NEXT: retq 217 ; 218 ; X32-CLZ-LABEL: ctlz_i32: 219 ; X32-CLZ: # %bb.0: 220 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 221 ; X32-CLZ-NEXT: retl 222 ; 223 ; X64-CLZ-LABEL: ctlz_i32: 224 ; X64-CLZ: # %bb.0: 225 ; X64-CLZ-NEXT: lzcntl %edi, %eax 226 ; X64-CLZ-NEXT: retq 227 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true ) 228 ret i32 %tmp 229 } 230 231 define i64 @ctlz_i64(i64 %x) { 232 ; X32-LABEL: ctlz_i64: 233 ; X32: # %bb.0: 234 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 235 ; X32-NEXT: testl %eax, %eax 236 ; X32-NEXT: jne .LBB7_1 237 ; X32-NEXT: # %bb.2: 238 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax 239 ; X32-NEXT: xorl $31, %eax 240 ; X32-NEXT: addl $32, %eax 241 ; X32-NEXT: xorl %edx, %edx 242 ; X32-NEXT: retl 243 ; X32-NEXT: .LBB7_1: 244 ; X32-NEXT: bsrl %eax, %eax 245 ; X32-NEXT: xorl $31, %eax 246 ; X32-NEXT: xorl %edx, %edx 247 ; X32-NEXT: retl 248 ; 249 ; X64-LABEL: ctlz_i64: 250 ; X64: # %bb.0: 251 ; X64-NEXT: bsrq %rdi, %rax 252 ; X64-NEXT: xorq $63, %rax 253 ; X64-NEXT: retq 254 ; 255 ; X32-CLZ-LABEL: ctlz_i64: 256 ; X32-CLZ: # %bb.0: 257 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 258 ; X32-CLZ-NEXT: testl %eax, %eax 259 ; X32-CLZ-NEXT: jne .LBB7_1 260 ; X32-CLZ-NEXT: # %bb.2: 261 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 262 ; X32-CLZ-NEXT: addl $32, %eax 263 ; X32-CLZ-NEXT: xorl %edx, %edx 264 ; X32-CLZ-NEXT: retl 265 ; X32-CLZ-NEXT: .LBB7_1: 266 ; X32-CLZ-NEXT: lzcntl %eax, %eax 267 ; X32-CLZ-NEXT: xorl %edx, %edx 268 ; X32-CLZ-NEXT: retl 269 ; 270 ; X64-CLZ-LABEL: ctlz_i64: 271 ; X64-CLZ: # %bb.0: 272 ; X64-CLZ-NEXT: lzcntq %rdi, %rax 273 ; X64-CLZ-NEXT: retq 274 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true ) 275 ret i64 %tmp 276 } 277 278 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 279 define i8 @ctlz_i8_zero_test(i8 %n) { 280 ; X32-LABEL: ctlz_i8_zero_test: 281 ; X32: # %bb.0: 282 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al 283 ; X32-NEXT: testb %al, %al 284 ; X32-NEXT: je .LBB8_1 285 ; X32-NEXT: # %bb.2: # %cond.false 286 ; X32-NEXT: movzbl %al, %eax 287 ; X32-NEXT: bsrl %eax, %eax 288 ; X32-NEXT: xorl $7, %eax 289 ; X32-NEXT: # kill: def $al killed $al killed $eax 290 ; X32-NEXT: retl 291 ; X32-NEXT: .LBB8_1: 292 ; X32-NEXT: movb $8, %al 293 ; X32-NEXT: # kill: def $al killed $al killed $eax 294 ; X32-NEXT: retl 295 ; 296 ; X64-LABEL: ctlz_i8_zero_test: 297 ; X64: # %bb.0: 298 ; X64-NEXT: testb %dil, %dil 299 ; X64-NEXT: je .LBB8_1 300 ; X64-NEXT: # %bb.2: # %cond.false 301 ; X64-NEXT: movzbl %dil, %eax 302 ; X64-NEXT: bsrl %eax, %eax 303 ; X64-NEXT: xorl $7, %eax 304 ; X64-NEXT: # kill: def $al killed $al killed $eax 305 ; X64-NEXT: retq 306 ; X64-NEXT: .LBB8_1: 307 ; X64-NEXT: movb $8, %al 308 ; X64-NEXT: # kill: def $al killed $al killed $eax 309 ; X64-NEXT: retq 310 ; 311 ; X32-CLZ-LABEL: ctlz_i8_zero_test: 312 ; X32-CLZ: # %bb.0: 313 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 314 ; X32-CLZ-NEXT: lzcntl %eax, %eax 315 ; X32-CLZ-NEXT: addl $-24, %eax 316 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax 317 ; X32-CLZ-NEXT: retl 318 ; 319 ; X64-CLZ-LABEL: ctlz_i8_zero_test: 320 ; X64-CLZ: # %bb.0: 321 ; X64-CLZ-NEXT: movzbl %dil, %eax 322 ; X64-CLZ-NEXT: lzcntl %eax, %eax 323 ; X64-CLZ-NEXT: addl $-24, %eax 324 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 325 ; X64-CLZ-NEXT: retq 326 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false) 327 ret i8 %tmp1 328 } 329 330 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 331 define i16 @ctlz_i16_zero_test(i16 %n) { 332 ; X32-LABEL: ctlz_i16_zero_test: 333 ; X32: # %bb.0: 334 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 335 ; X32-NEXT: testw %ax, %ax 336 ; X32-NEXT: je .LBB9_1 337 ; X32-NEXT: # %bb.2: # %cond.false 338 ; X32-NEXT: bsrw %ax, %ax 339 ; X32-NEXT: xorl $15, %eax 340 ; X32-NEXT: # kill: def $ax killed $ax killed $eax 341 ; X32-NEXT: retl 342 ; X32-NEXT: .LBB9_1: 343 ; X32-NEXT: movw $16, %ax 344 ; X32-NEXT: # kill: def $ax killed $ax killed $eax 345 ; X32-NEXT: retl 346 ; 347 ; X64-LABEL: ctlz_i16_zero_test: 348 ; X64: # %bb.0: 349 ; X64-NEXT: testw %di, %di 350 ; X64-NEXT: je .LBB9_1 351 ; X64-NEXT: # %bb.2: # %cond.false 352 ; X64-NEXT: bsrw %di, %ax 353 ; X64-NEXT: xorl $15, %eax 354 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 355 ; X64-NEXT: retq 356 ; X64-NEXT: .LBB9_1: 357 ; X64-NEXT: movw $16, %ax 358 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 359 ; X64-NEXT: retq 360 ; 361 ; X32-CLZ-LABEL: ctlz_i16_zero_test: 362 ; X32-CLZ: # %bb.0: 363 ; X32-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 364 ; X32-CLZ-NEXT: retl 365 ; 366 ; X64-CLZ-LABEL: ctlz_i16_zero_test: 367 ; X64-CLZ: # %bb.0: 368 ; X64-CLZ-NEXT: lzcntw %di, %ax 369 ; X64-CLZ-NEXT: retq 370 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false) 371 ret i16 %tmp1 372 } 373 374 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 375 define i32 @ctlz_i32_zero_test(i32 %n) { 376 ; X32-LABEL: ctlz_i32_zero_test: 377 ; X32: # %bb.0: 378 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 379 ; X32-NEXT: testl %eax, %eax 380 ; X32-NEXT: je .LBB10_1 381 ; X32-NEXT: # %bb.2: # %cond.false 382 ; X32-NEXT: bsrl %eax, %eax 383 ; X32-NEXT: xorl $31, %eax 384 ; X32-NEXT: retl 385 ; X32-NEXT: .LBB10_1: 386 ; X32-NEXT: movl $32, %eax 387 ; X32-NEXT: retl 388 ; 389 ; X64-LABEL: ctlz_i32_zero_test: 390 ; X64: # %bb.0: 391 ; X64-NEXT: testl %edi, %edi 392 ; X64-NEXT: je .LBB10_1 393 ; X64-NEXT: # %bb.2: # %cond.false 394 ; X64-NEXT: bsrl %edi, %eax 395 ; X64-NEXT: xorl $31, %eax 396 ; X64-NEXT: retq 397 ; X64-NEXT: .LBB10_1: 398 ; X64-NEXT: movl $32, %eax 399 ; X64-NEXT: retq 400 ; 401 ; X32-CLZ-LABEL: ctlz_i32_zero_test: 402 ; X32-CLZ: # %bb.0: 403 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 404 ; X32-CLZ-NEXT: retl 405 ; 406 ; X64-CLZ-LABEL: ctlz_i32_zero_test: 407 ; X64-CLZ: # %bb.0: 408 ; X64-CLZ-NEXT: lzcntl %edi, %eax 409 ; X64-CLZ-NEXT: retq 410 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 411 ret i32 %tmp1 412 } 413 414 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 415 define i64 @ctlz_i64_zero_test(i64 %n) { 416 ; X32-LABEL: ctlz_i64_zero_test: 417 ; X32: # %bb.0: 418 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 419 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %edx 420 ; X32-NEXT: movl $63, %eax 421 ; X32-NEXT: je .LBB11_2 422 ; X32-NEXT: # %bb.1: 423 ; X32-NEXT: movl %edx, %eax 424 ; X32-NEXT: .LBB11_2: 425 ; X32-NEXT: testl %ecx, %ecx 426 ; X32-NEXT: jne .LBB11_3 427 ; X32-NEXT: # %bb.4: 428 ; X32-NEXT: xorl $31, %eax 429 ; X32-NEXT: addl $32, %eax 430 ; X32-NEXT: xorl %edx, %edx 431 ; X32-NEXT: retl 432 ; X32-NEXT: .LBB11_3: 433 ; X32-NEXT: bsrl %ecx, %eax 434 ; X32-NEXT: xorl $31, %eax 435 ; X32-NEXT: xorl %edx, %edx 436 ; X32-NEXT: retl 437 ; 438 ; X64-LABEL: ctlz_i64_zero_test: 439 ; X64: # %bb.0: 440 ; X64-NEXT: testq %rdi, %rdi 441 ; X64-NEXT: je .LBB11_1 442 ; X64-NEXT: # %bb.2: # %cond.false 443 ; X64-NEXT: bsrq %rdi, %rax 444 ; X64-NEXT: xorq $63, %rax 445 ; X64-NEXT: retq 446 ; X64-NEXT: .LBB11_1: 447 ; X64-NEXT: movl $64, %eax 448 ; X64-NEXT: retq 449 ; 450 ; X32-CLZ-LABEL: ctlz_i64_zero_test: 451 ; X32-CLZ: # %bb.0: 452 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 453 ; X32-CLZ-NEXT: testl %eax, %eax 454 ; X32-CLZ-NEXT: jne .LBB11_1 455 ; X32-CLZ-NEXT: # %bb.2: 456 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 457 ; X32-CLZ-NEXT: addl $32, %eax 458 ; X32-CLZ-NEXT: xorl %edx, %edx 459 ; X32-CLZ-NEXT: retl 460 ; X32-CLZ-NEXT: .LBB11_1: 461 ; X32-CLZ-NEXT: lzcntl %eax, %eax 462 ; X32-CLZ-NEXT: xorl %edx, %edx 463 ; X32-CLZ-NEXT: retl 464 ; 465 ; X64-CLZ-LABEL: ctlz_i64_zero_test: 466 ; X64-CLZ: # %bb.0: 467 ; X64-CLZ-NEXT: lzcntq %rdi, %rax 468 ; X64-CLZ-NEXT: retq 469 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false) 470 ret i64 %tmp1 471 } 472 473 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 474 define i8 @cttz_i8_zero_test(i8 %n) { 475 ; X32-LABEL: cttz_i8_zero_test: 476 ; X32: # %bb.0: 477 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al 478 ; X32-NEXT: testb %al, %al 479 ; X32-NEXT: je .LBB12_1 480 ; X32-NEXT: # %bb.2: # %cond.false 481 ; X32-NEXT: movzbl %al, %eax 482 ; X32-NEXT: bsfl %eax, %eax 483 ; X32-NEXT: # kill: def $al killed $al killed $eax 484 ; X32-NEXT: retl 485 ; X32-NEXT: .LBB12_1 486 ; X32-NEXT: movb $8, %al 487 ; X32-NEXT: # kill: def $al killed $al killed $eax 488 ; X32-NEXT: retl 489 ; 490 ; X64-LABEL: cttz_i8_zero_test: 491 ; X64: # %bb.0: 492 ; X64-NEXT: testb %dil, %dil 493 ; X64-NEXT: je .LBB12_1 494 ; X64-NEXT: # %bb.2: # %cond.false 495 ; X64-NEXT: movzbl %dil, %eax 496 ; X64-NEXT: bsfl %eax, %eax 497 ; X64-NEXT: # kill: def $al killed $al killed $eax 498 ; X64-NEXT: retq 499 ; X64-NEXT: .LBB12_1: 500 ; X64-NEXT: movb $8, %al 501 ; X64-NEXT: # kill: def $al killed $al killed $eax 502 ; X64-NEXT: retq 503 ; 504 ; X32-CLZ-LABEL: cttz_i8_zero_test: 505 ; X32-CLZ: # %bb.0: 506 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 507 ; X32-CLZ-NEXT: orl $256, %eax # imm = 0x100 508 ; X32-CLZ-NEXT: tzcntl %eax, %eax 509 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax 510 ; X32-CLZ-NEXT: retl 511 ; 512 ; X64-CLZ-LABEL: cttz_i8_zero_test: 513 ; X64-CLZ: # %bb.0: 514 ; X64-CLZ-NEXT: movzbl %dil, %eax 515 ; X64-CLZ-NEXT: orl $256, %eax # imm = 0x100 516 ; X64-CLZ-NEXT: tzcntl %eax, %eax 517 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 518 ; X64-CLZ-NEXT: retq 519 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false) 520 ret i8 %tmp1 521 } 522 523 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 524 define i16 @cttz_i16_zero_test(i16 %n) { 525 ; X32-LABEL: cttz_i16_zero_test: 526 ; X32: # %bb.0: 527 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 528 ; X32-NEXT: testw %ax, %ax 529 ; X32-NEXT: je .LBB13_1 530 ; X32-NEXT: # %bb.2: # %cond.false 531 ; X32-NEXT: bsfw %ax, %ax 532 ; X32-NEXT: retl 533 ; X32-NEXT: .LBB13_1 534 ; X32-NEXT: movw $16, %ax 535 ; X32-NEXT: retl 536 ; 537 ; X64-LABEL: cttz_i16_zero_test: 538 ; X64: # %bb.0: 539 ; X64-NEXT: testw %di, %di 540 ; X64-NEXT: je .LBB13_1 541 ; X64-NEXT: # %bb.2: # %cond.false 542 ; X64-NEXT: bsfw %di, %ax 543 ; X64-NEXT: retq 544 ; X64-NEXT: .LBB13_1: 545 ; X64-NEXT: movw $16, %ax 546 ; X64-NEXT: retq 547 ; 548 ; X32-CLZ-LABEL: cttz_i16_zero_test: 549 ; X32-CLZ: # %bb.0: 550 ; X32-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax 551 ; X32-CLZ-NEXT: retl 552 ; 553 ; X64-CLZ-LABEL: cttz_i16_zero_test: 554 ; X64-CLZ: # %bb.0: 555 ; X64-CLZ-NEXT: tzcntw %di, %ax 556 ; X64-CLZ-NEXT: retq 557 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false) 558 ret i16 %tmp1 559 } 560 561 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 562 define i32 @cttz_i32_zero_test(i32 %n) { 563 ; X32-LABEL: cttz_i32_zero_test: 564 ; X32: # %bb.0: 565 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 566 ; X32-NEXT: testl %eax, %eax 567 ; X32-NEXT: je .LBB14_1 568 ; X32-NEXT: # %bb.2: # %cond.false 569 ; X32-NEXT: bsfl %eax, %eax 570 ; X32-NEXT: retl 571 ; X32-NEXT: .LBB14_1 572 ; X32-NEXT: movl $32, %eax 573 ; X32-NEXT: retl 574 ; 575 ; X64-LABEL: cttz_i32_zero_test: 576 ; X64: # %bb.0: 577 ; X64-NEXT: testl %edi, %edi 578 ; X64-NEXT: je .LBB14_1 579 ; X64-NEXT: # %bb.2: # %cond.false 580 ; X64-NEXT: bsfl %edi, %eax 581 ; X64-NEXT: retq 582 ; X64-NEXT: .LBB14_1: 583 ; X64-NEXT: movl $32, %eax 584 ; X64-NEXT: retq 585 ; 586 ; X32-CLZ-LABEL: cttz_i32_zero_test: 587 ; X32-CLZ: # %bb.0: 588 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 589 ; X32-CLZ-NEXT: retl 590 ; 591 ; X64-CLZ-LABEL: cttz_i32_zero_test: 592 ; X64-CLZ: # %bb.0: 593 ; X64-CLZ-NEXT: tzcntl %edi, %eax 594 ; X64-CLZ-NEXT: retq 595 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false) 596 ret i32 %tmp1 597 } 598 599 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 600 define i64 @cttz_i64_zero_test(i64 %n) { 601 ; X32-LABEL: cttz_i64_zero_test: 602 ; X32: # %bb.0: 603 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 604 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %edx 605 ; X32-NEXT: movl $32, %eax 606 ; X32-NEXT: je .LBB15_2 607 ; X32-NEXT: # %bb.1: 608 ; X32-NEXT: movl %edx, %eax 609 ; X32-NEXT: .LBB15_2: 610 ; X32-NEXT: testl %ecx, %ecx 611 ; X32-NEXT: jne .LBB15_3 612 ; X32-NEXT: # %bb.4: 613 ; X32-NEXT: addl $32, %eax 614 ; X32-NEXT: xorl %edx, %edx 615 ; X32-NEXT: retl 616 ; X32-NEXT: .LBB15_3: 617 ; X32-NEXT: bsfl %ecx, %eax 618 ; X32-NEXT: xorl %edx, %edx 619 ; X32-NEXT: retl 620 ; 621 ; X64-LABEL: cttz_i64_zero_test: 622 ; X64: # %bb.0: 623 ; X64-NEXT: testq %rdi, %rdi 624 ; X64-NEXT: je .LBB15_1 625 ; X64-NEXT: # %bb.2: # %cond.false 626 ; X64-NEXT: bsfq %rdi, %rax 627 ; X64-NEXT: retq 628 ; X64-NEXT: .LBB15_1: 629 ; X64-NEXT: movl $64, %eax 630 ; X64-NEXT: retq 631 ; 632 ; X32-CLZ-LABEL: cttz_i64_zero_test: 633 ; X32-CLZ: # %bb.0: 634 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 635 ; X32-CLZ-NEXT: testl %eax, %eax 636 ; X32-CLZ-NEXT: jne .LBB15_1 637 ; X32-CLZ-NEXT: # %bb.2: 638 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 639 ; X32-CLZ-NEXT: addl $32, %eax 640 ; X32-CLZ-NEXT: xorl %edx, %edx 641 ; X32-CLZ-NEXT: retl 642 ; X32-CLZ-NEXT: .LBB15_1: 643 ; X32-CLZ-NEXT: tzcntl %eax, %eax 644 ; X32-CLZ-NEXT: xorl %edx, %edx 645 ; X32-CLZ-NEXT: retl 646 ; 647 ; X64-CLZ-LABEL: cttz_i64_zero_test: 648 ; X64-CLZ: # %bb.0: 649 ; X64-CLZ-NEXT: tzcntq %rdi, %rax 650 ; X64-CLZ-NEXT: retq 651 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false) 652 ret i64 %tmp1 653 } 654 655 ; Don't generate the cmovne when the source is known non-zero (and bsr would 656 ; not set ZF). 657 ; rdar://9490949 658 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 659 ; codegen doesn't know how to delete the movl and je. 660 define i32 @ctlz_i32_fold_cmov(i32 %n) { 661 ; X32-LABEL: ctlz_i32_fold_cmov: 662 ; X32: # %bb.0: 663 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 664 ; X32-NEXT: orl $1, %eax 665 ; X32-NEXT: je .LBB16_1 666 ; X32-NEXT: # %bb.2: # %cond.false 667 ; X32-NEXT: bsrl %eax, %eax 668 ; X32-NEXT: xorl $31, %eax 669 ; X32-NEXT: retl 670 ; X32-NEXT: .LBB16_1 671 ; X32-NEXT: movl $32, %eax 672 ; X32-NEXT: retl 673 ; 674 ; X64-LABEL: ctlz_i32_fold_cmov: 675 ; X64: # %bb.0: 676 ; X64-NEXT: orl $1, %edi 677 ; X64-NEXT: je .LBB16_1 678 ; X64-NEXT: # %bb.2: # %cond.false 679 ; X64-NEXT: bsrl %edi, %eax 680 ; X64-NEXT: xorl $31, %eax 681 ; X64-NEXT: retq 682 ; X64-NEXT: .LBB16_1: 683 ; X64-NEXT: movl $32, %eax 684 ; X64-NEXT: retq 685 ; 686 ; X32-CLZ-LABEL: ctlz_i32_fold_cmov: 687 ; X32-CLZ: # %bb.0: 688 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 689 ; X32-CLZ-NEXT: orl $1, %eax 690 ; X32-CLZ-NEXT: lzcntl %eax, %eax 691 ; X32-CLZ-NEXT: retl 692 ; 693 ; X64-CLZ-LABEL: ctlz_i32_fold_cmov: 694 ; X64-CLZ: # %bb.0: 695 ; X64-CLZ-NEXT: orl $1, %edi 696 ; X64-CLZ-NEXT: lzcntl %edi, %eax 697 ; X64-CLZ-NEXT: retq 698 %or = or i32 %n, 1 699 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false) 700 ret i32 %tmp1 701 } 702 703 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute 704 ; the most significant bit, which is what 'bsr' does natively. 705 ; FIXME: We should probably select BSR instead of LZCNT in these circumstances. 706 define i32 @ctlz_bsr(i32 %n) { 707 ; X32-LABEL: ctlz_bsr: 708 ; X32: # %bb.0: 709 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax 710 ; X32-NEXT: retl 711 ; 712 ; X64-LABEL: ctlz_bsr: 713 ; X64: # %bb.0: 714 ; X64-NEXT: bsrl %edi, %eax 715 ; X64-NEXT: retq 716 ; 717 ; X32-CLZ-LABEL: ctlz_bsr: 718 ; X32-CLZ: # %bb.0: 719 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 720 ; X32-CLZ-NEXT: xorl $31, %eax 721 ; X32-CLZ-NEXT: retl 722 ; 723 ; X64-CLZ-LABEL: ctlz_bsr: 724 ; X64-CLZ: # %bb.0: 725 ; X64-CLZ-NEXT: lzcntl %edi, %eax 726 ; X64-CLZ-NEXT: xorl $31, %eax 727 ; X64-CLZ-NEXT: retq 728 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true) 729 %bsr = xor i32 %ctlz, 31 730 ret i32 %bsr 731 } 732 733 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 734 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 735 ; codegen doesn't know how to combine the $32 and $31 into $63. 736 define i32 @ctlz_bsr_zero_test(i32 %n) { 737 ; X32-LABEL: ctlz_bsr_zero_test: 738 ; X32: # %bb.0: 739 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 740 ; X32-NEXT: testl %eax, %eax 741 ; X32-NEXT: je .LBB18_1 742 ; X32-NEXT: # %bb.2: # %cond.false 743 ; X32-NEXT: bsrl %eax, %eax 744 ; X32-NEXT: xorl $31, %eax 745 ; X32-NEXT: xorl $31, %eax 746 ; X32-NEXT: retl 747 ; X32-NEXT: .LBB18_1: 748 ; X32-NEXT: movl $32, %eax 749 ; X32-NEXT: xorl $31, %eax 750 ; X32-NEXT: retl 751 ; 752 ; X64-LABEL: ctlz_bsr_zero_test: 753 ; X64: # %bb.0: 754 ; X64-NEXT: testl %edi, %edi 755 ; X64-NEXT: je .LBB18_1 756 ; X64-NEXT: # %bb.2: # %cond.false 757 ; X64-NEXT: bsrl %edi, %eax 758 ; X64-NEXT: xorl $31, %eax 759 ; X64-NEXT: xorl $31, %eax 760 ; X64-NEXT: retq 761 ; X64-NEXT: .LBB18_1: 762 ; X64-NEXT: movl $32, %eax 763 ; X64-NEXT: xorl $31, %eax 764 ; X64-NEXT: retq 765 ; 766 ; X32-CLZ-LABEL: ctlz_bsr_zero_test: 767 ; X32-CLZ: # %bb.0: 768 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 769 ; X32-CLZ-NEXT: xorl $31, %eax 770 ; X32-CLZ-NEXT: retl 771 ; 772 ; X64-CLZ-LABEL: ctlz_bsr_zero_test: 773 ; X64-CLZ: # %bb.0: 774 ; X64-CLZ-NEXT: lzcntl %edi, %eax 775 ; X64-CLZ-NEXT: xorl $31, %eax 776 ; X64-CLZ-NEXT: retq 777 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 778 %bsr = xor i32 %ctlz, 31 779 ret i32 %bsr 780 } 781 782 define i8 @cttz_i8_knownbits(i8 %x) { 783 ; X32-LABEL: cttz_i8_knownbits: 784 ; X32: # %bb.0: 785 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al 786 ; X32-NEXT: orb $2, %al 787 ; X32-NEXT: movzbl %al, %eax 788 ; X32-NEXT: bsfl %eax, %eax 789 ; X32-NEXT: # kill: def $al killed $al killed $eax 790 ; X32-NEXT: retl 791 ; 792 ; X64-LABEL: cttz_i8_knownbits: 793 ; X64: # %bb.0: 794 ; X64-NEXT: orb $2, %dil 795 ; X64-NEXT: movzbl %dil, %eax 796 ; X64-NEXT: bsfl %eax, %eax 797 ; X64-NEXT: # kill: def $al killed $al killed $eax 798 ; X64-NEXT: retq 799 ; 800 ; X32-CLZ-LABEL: cttz_i8_knownbits: 801 ; X32-CLZ: # %bb.0: 802 ; X32-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al 803 ; X32-CLZ-NEXT: orb $2, %al 804 ; X32-CLZ-NEXT: movzbl %al, %eax 805 ; X32-CLZ-NEXT: tzcntl %eax, %eax 806 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax 807 ; X32-CLZ-NEXT: retl 808 ; 809 ; X64-CLZ-LABEL: cttz_i8_knownbits: 810 ; X64-CLZ: # %bb.0: 811 ; X64-CLZ-NEXT: orb $2, %dil 812 ; X64-CLZ-NEXT: movzbl %dil, %eax 813 ; X64-CLZ-NEXT: tzcntl %eax, %eax 814 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 815 ; X64-CLZ-NEXT: retq 816 %x2 = or i8 %x, 2 817 %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true ) 818 %tmp2 = and i8 %tmp, 1 819 ret i8 %tmp2 820 } 821 822 define i8 @ctlz_i8_knownbits(i8 %x) { 823 ; X32-LABEL: ctlz_i8_knownbits: 824 ; X32: # %bb.0: 825 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al 826 ; X32-NEXT: orb $64, %al 827 ; X32-NEXT: movzbl %al, %eax 828 ; X32-NEXT: bsrl %eax, %eax 829 ; X32-NEXT: xorl $7, %eax 830 ; X32-NEXT: # kill: def $al killed $al killed $eax 831 ; X32-NEXT: retl 832 ; 833 ; X64-LABEL: ctlz_i8_knownbits: 834 ; X64: # %bb.0: 835 ; X64-NEXT: orb $64, %dil 836 ; X64-NEXT: movzbl %dil, %eax 837 ; X64-NEXT: bsrl %eax, %eax 838 ; X64-NEXT: xorl $7, %eax 839 ; X64-NEXT: # kill: def $al killed $al killed $eax 840 ; X64-NEXT: retq 841 ; 842 ; X32-CLZ-LABEL: ctlz_i8_knownbits: 843 ; X32-CLZ: # %bb.0: 844 ; X32-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al 845 ; X32-CLZ-NEXT: orb $64, %al 846 ; X32-CLZ-NEXT: movzbl %al, %eax 847 ; X32-CLZ-NEXT: lzcntl %eax, %eax 848 ; X32-CLZ-NEXT: addl $-24, %eax 849 ; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax 850 ; X32-CLZ-NEXT: retl 851 ; 852 ; X64-CLZ-LABEL: ctlz_i8_knownbits: 853 ; X64-CLZ: # %bb.0: 854 ; X64-CLZ-NEXT: orb $64, %dil 855 ; X64-CLZ-NEXT: movzbl %dil, %eax 856 ; X64-CLZ-NEXT: lzcntl %eax, %eax 857 ; X64-CLZ-NEXT: addl $-24, %eax 858 ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 859 ; X64-CLZ-NEXT: retq 860 861 %x2 = or i8 %x, 64 862 %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true ) 863 %tmp2 = and i8 %tmp, 1 864 ret i8 %tmp2 865 } 866 867 ; Make sure we can detect that the input is non-zero and avoid cmov after BSR 868 ; This is relevant for 32-bit mode without lzcnt 869 define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) { 870 ; X32-LABEL: ctlz_i64_zero_test_knownneverzero: 871 ; X32: # %bb.0: 872 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 873 ; X32-NEXT: testl %eax, %eax 874 ; X32-NEXT: jne .LBB21_1 875 ; X32-NEXT: # %bb.2: 876 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 877 ; X32-NEXT: orl $1, %eax 878 ; X32-NEXT: bsrl %eax, %eax 879 ; X32-NEXT: xorl $31, %eax 880 ; X32-NEXT: orl $32, %eax 881 ; X32-NEXT: xorl %edx, %edx 882 ; X32-NEXT: retl 883 ; X32-NEXT: .LBB21_1: 884 ; X32-NEXT: bsrl %eax, %eax 885 ; X32-NEXT: xorl $31, %eax 886 ; X32-NEXT: xorl %edx, %edx 887 ; X32-NEXT: retl 888 ; 889 ; X64-LABEL: ctlz_i64_zero_test_knownneverzero: 890 ; X64: # %bb.0: 891 ; X64-NEXT: orq $1, %rdi 892 ; X64-NEXT: je .LBB21_1 893 ; X64-NEXT: # %bb.2: # %cond.false 894 ; X64-NEXT: bsrq %rdi, %rax 895 ; X64-NEXT: xorq $63, %rax 896 ; X64-NEXT: retq 897 ; X64-NEXT: .LBB21_1: 898 ; X64-NEXT: movl $64, %eax 899 ; X64-NEXT: retq 900 ; 901 ; X32-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero: 902 ; X32-CLZ: # %bb.0: 903 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 904 ; X32-CLZ-NEXT: testl %eax, %eax 905 ; X32-CLZ-NEXT: jne .LBB21_1 906 ; X32-CLZ-NEXT: # %bb.2: 907 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 908 ; X32-CLZ-NEXT: orl $1, %eax 909 ; X32-CLZ-NEXT: lzcntl %eax, %eax 910 ; X32-CLZ-NEXT: orl $32, %eax 911 ; X32-CLZ-NEXT: xorl %edx, %edx 912 ; X32-CLZ-NEXT: retl 913 ; X32-CLZ-NEXT: .LBB21_1: 914 ; X32-CLZ-NEXT: lzcntl %eax, %eax 915 ; X32-CLZ-NEXT: xorl %edx, %edx 916 ; X32-CLZ-NEXT: retl 917 ; 918 ; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero: 919 ; X64-CLZ: # %bb.0: 920 ; X64-CLZ-NEXT: orq $1, %rdi 921 ; X64-CLZ-NEXT: lzcntq %rdi, %rax 922 ; X64-CLZ-NEXT: retq 923 %o = or i64 %n, 1 924 %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false) 925 ret i64 %tmp1 926 } 927 928 ; Make sure we can detect that the input is non-zero and avoid cmov after BSF 929 ; This is relevant for 32-bit mode without tzcnt 930 define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) { 931 ; X32-LABEL: cttz_i64_zero_test_knownneverzero: 932 ; X32: # %bb.0: 933 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 934 ; X32-NEXT: testl %eax, %eax 935 ; X32-NEXT: jne .LBB22_1 936 ; X32-NEXT: # %bb.2: 937 ; X32-NEXT: movl $-2147483648, %eax # imm = 0x80000000 938 ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax 939 ; X32-NEXT: bsfl %eax, %eax 940 ; X32-NEXT: orl $32, %eax 941 ; X32-NEXT: xorl %edx, %edx 942 ; X32-NEXT: retl 943 ; X32-NEXT: .LBB22_1: 944 ; X32-NEXT: bsfl %eax, %eax 945 ; X32-NEXT: xorl %edx, %edx 946 ; X32-NEXT: retl 947 ; 948 ; X64-LABEL: cttz_i64_zero_test_knownneverzero: 949 ; X64: # %bb.0: 950 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 951 ; X64-NEXT: orq %rdi, %rax 952 ; X64-NEXT: je .LBB22_1 953 ; X64-NEXT: # %bb.2: # %cond.false 954 ; X64-NEXT: bsfq %rax, %rax 955 ; X64-NEXT: retq 956 ; X64-NEXT: .LBB22_1: 957 ; X64-NEXT: movl $64, %eax 958 ; X64-NEXT: retq 959 ; 960 ; X32-CLZ-LABEL: cttz_i64_zero_test_knownneverzero: 961 ; X32-CLZ: # %bb.0: 962 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 963 ; X32-CLZ-NEXT: testl %eax, %eax 964 ; X32-CLZ-NEXT: jne .LBB22_1 965 ; X32-CLZ-NEXT: # %bb.2: 966 ; X32-CLZ-NEXT: movl $-2147483648, %eax # imm = 0x80000000 967 ; X32-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax 968 ; X32-CLZ-NEXT: tzcntl %eax, %eax 969 ; X32-CLZ-NEXT: orl $32, %eax 970 ; X32-CLZ-NEXT: xorl %edx, %edx 971 ; X32-CLZ-NEXT: retl 972 ; X32-CLZ-NEXT: .LBB22_1: 973 ; X32-CLZ-NEXT: tzcntl %eax, %eax 974 ; X32-CLZ-NEXT: xorl %edx, %edx 975 ; X32-CLZ-NEXT: retl 976 ; 977 ; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero: 978 ; X64-CLZ: # %bb.0: 979 ; X64-CLZ-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 980 ; X64-CLZ-NEXT: orq %rdi, %rax 981 ; X64-CLZ-NEXT: tzcntq %rax, %rax 982 ; X64-CLZ-NEXT: retq 983 %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000 984 %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false) 985 ret i64 %tmp1 986 } 987