Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X64
      4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X32-CLZ
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X64-CLZ
      6 
      7 declare i8 @llvm.cttz.i8(i8, i1)
      8 declare i16 @llvm.cttz.i16(i16, i1)
      9 declare i32 @llvm.cttz.i32(i32, i1)
     10 declare i64 @llvm.cttz.i64(i64, i1)
     11 
     12 declare i8 @llvm.ctlz.i8(i8, i1)
     13 declare i16 @llvm.ctlz.i16(i16, i1)
     14 declare i32 @llvm.ctlz.i32(i32, i1)
     15 declare i64 @llvm.ctlz.i64(i64, i1)
     16 
     17 define i8 @cttz_i8(i8 %x)  {
     18 ; X32-LABEL: cttz_i8:
     19 ; X32:       # %bb.0:
     20 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
     21 ; X32-NEXT:    bsfl %eax, %eax
     22 ; X32-NEXT:    # kill: def $al killed $al killed $eax
     23 ; X32-NEXT:    retl
     24 ;
     25 ; X64-LABEL: cttz_i8:
     26 ; X64:       # %bb.0:
     27 ; X64-NEXT:    movzbl %dil, %eax
     28 ; X64-NEXT:    bsfl %eax, %eax
     29 ; X64-NEXT:    # kill: def $al killed $al killed $eax
     30 ; X64-NEXT:    retq
     31 ;
     32 ; X32-CLZ-LABEL: cttz_i8:
     33 ; X32-CLZ:       # %bb.0:
     34 ; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
     35 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
     36 ; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
     37 ; X32-CLZ-NEXT:    retl
     38 ;
     39 ; X64-CLZ-LABEL: cttz_i8:
     40 ; X64-CLZ:       # %bb.0:
     41 ; X64-CLZ-NEXT:    movzbl %dil, %eax
     42 ; X64-CLZ-NEXT:    tzcntl %eax, %eax
     43 ; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
     44 ; X64-CLZ-NEXT:    retq
     45   %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
     46   ret i8 %tmp
     47 }
     48 
     49 define i16 @cttz_i16(i16 %x)  {
     50 ; X32-LABEL: cttz_i16:
     51 ; X32:       # %bb.0:
     52 ; X32-NEXT:    bsfw {{[0-9]+}}(%esp), %ax
     53 ; X32-NEXT:    retl
     54 ;
     55 ; X64-LABEL: cttz_i16:
     56 ; X64:       # %bb.0:
     57 ; X64-NEXT:    bsfw %di, %ax
     58 ; X64-NEXT:    retq
     59 ;
     60 ; X32-CLZ-LABEL: cttz_i16:
     61 ; X32-CLZ:       # %bb.0:
     62 ; X32-CLZ-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
     63 ; X32-CLZ-NEXT:    retl
     64 ;
     65 ; X64-CLZ-LABEL: cttz_i16:
     66 ; X64-CLZ:       # %bb.0:
     67 ; X64-CLZ-NEXT:    tzcntw %di, %ax
     68 ; X64-CLZ-NEXT:    retq
     69   %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
     70   ret i16 %tmp
     71 }
     72 
     73 define i32 @cttz_i32(i32 %x)  {
     74 ; X32-LABEL: cttz_i32:
     75 ; X32:       # %bb.0:
     76 ; X32-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
     77 ; X32-NEXT:    retl
     78 ;
     79 ; X64-LABEL: cttz_i32:
     80 ; X64:       # %bb.0:
     81 ; X64-NEXT:    bsfl %edi, %eax
     82 ; X64-NEXT:    retq
     83 ;
     84 ; X32-CLZ-LABEL: cttz_i32:
     85 ; X32-CLZ:       # %bb.0:
     86 ; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
     87 ; X32-CLZ-NEXT:    retl
     88 ;
     89 ; X64-CLZ-LABEL: cttz_i32:
     90 ; X64-CLZ:       # %bb.0:
     91 ; X64-CLZ-NEXT:    tzcntl %edi, %eax
     92 ; X64-CLZ-NEXT:    retq
     93   %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
     94   ret i32 %tmp
     95 }
     96 
     97 define i64 @cttz_i64(i64 %x)  {
     98 ; X32-LABEL: cttz_i64:
     99 ; X32:       # %bb.0:
    100 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    101 ; X32-NEXT:    testl %eax, %eax
    102 ; X32-NEXT:    jne .LBB3_1
    103 ; X32-NEXT:  # %bb.2:
    104 ; X32-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
    105 ; X32-NEXT:    addl $32, %eax
    106 ; X32-NEXT:    xorl %edx, %edx
    107 ; X32-NEXT:    retl
    108 ; X32-NEXT:  .LBB3_1:
    109 ; X32-NEXT:    bsfl %eax, %eax
    110 ; X32-NEXT:    xorl %edx, %edx
    111 ; X32-NEXT:    retl
    112 ;
    113 ; X64-LABEL: cttz_i64:
    114 ; X64:       # %bb.0:
    115 ; X64-NEXT:    bsfq %rdi, %rax
    116 ; X64-NEXT:    retq
    117 ;
    118 ; X32-CLZ-LABEL: cttz_i64:
    119 ; X32-CLZ:       # %bb.0:
    120 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    121 ; X32-CLZ-NEXT:    testl %eax, %eax
    122 ; X32-CLZ-NEXT:    jne .LBB3_1
    123 ; X32-CLZ-NEXT:  # %bb.2:
    124 ; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
    125 ; X32-CLZ-NEXT:    addl $32, %eax
    126 ; X32-CLZ-NEXT:    xorl %edx, %edx
    127 ; X32-CLZ-NEXT:    retl
    128 ; X32-CLZ-NEXT:  .LBB3_1:
    129 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
    130 ; X32-CLZ-NEXT:    xorl %edx, %edx
    131 ; X32-CLZ-NEXT:    retl
    132 ;
    133 ; X64-CLZ-LABEL: cttz_i64:
    134 ; X64-CLZ:       # %bb.0:
    135 ; X64-CLZ-NEXT:    tzcntq %rdi, %rax
    136 ; X64-CLZ-NEXT:    retq
    137   %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
    138   ret i64 %tmp
    139 }
    140 
    141 define i8 @ctlz_i8(i8 %x) {
    142 ; X32-LABEL: ctlz_i8:
    143 ; X32:       # %bb.0:
    144 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    145 ; X32-NEXT:    bsrl %eax, %eax
    146 ; X32-NEXT:    xorl $7, %eax
    147 ; X32-NEXT:    # kill: def $al killed $al killed $eax
    148 ; X32-NEXT:    retl
    149 ;
    150 ; X64-LABEL: ctlz_i8:
    151 ; X64:       # %bb.0:
    152 ; X64-NEXT:    movzbl %dil, %eax
    153 ; X64-NEXT:    bsrl %eax, %eax
    154 ; X64-NEXT:    xorl $7, %eax
    155 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    156 ; X64-NEXT:    retq
    157 ;
    158 ; X32-CLZ-LABEL: ctlz_i8:
    159 ; X32-CLZ:       # %bb.0:
    160 ; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    161 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    162 ; X32-CLZ-NEXT:    addl $-24, %eax
    163 ; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    164 ; X32-CLZ-NEXT:    retl
    165 ;
    166 ; X64-CLZ-LABEL: ctlz_i8:
    167 ; X64-CLZ:       # %bb.0:
    168 ; X64-CLZ-NEXT:    movzbl %dil, %eax
    169 ; X64-CLZ-NEXT:    lzcntl %eax, %eax
    170 ; X64-CLZ-NEXT:    addl $-24, %eax
    171 ; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    172 ; X64-CLZ-NEXT:    retq
    173   %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
    174   ret i8 %tmp2
    175 }
    176 
    177 define i16 @ctlz_i16(i16 %x) {
    178 ; X32-LABEL: ctlz_i16:
    179 ; X32:       # %bb.0:
    180 ; X32-NEXT:    bsrw {{[0-9]+}}(%esp), %ax
    181 ; X32-NEXT:    xorl $15, %eax
    182 ; X32-NEXT:    # kill: def $ax killed $ax killed $eax
    183 ; X32-NEXT:    retl
    184 ;
    185 ; X64-LABEL: ctlz_i16:
    186 ; X64:       # %bb.0:
    187 ; X64-NEXT:    bsrw %di, %ax
    188 ; X64-NEXT:    xorl $15, %eax
    189 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
    190 ; X64-NEXT:    retq
    191 ;
    192 ; X32-CLZ-LABEL: ctlz_i16:
    193 ; X32-CLZ:       # %bb.0:
    194 ; X32-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
    195 ; X32-CLZ-NEXT:    retl
    196 ;
    197 ; X64-CLZ-LABEL: ctlz_i16:
    198 ; X64-CLZ:       # %bb.0:
    199 ; X64-CLZ-NEXT:    lzcntw %di, %ax
    200 ; X64-CLZ-NEXT:    retq
    201   %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
    202   ret i16 %tmp2
    203 }
    204 
    205 define i32 @ctlz_i32(i32 %x) {
    206 ; X32-LABEL: ctlz_i32:
    207 ; X32:       # %bb.0:
    208 ; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
    209 ; X32-NEXT:    xorl $31, %eax
    210 ; X32-NEXT:    retl
    211 ;
    212 ; X64-LABEL: ctlz_i32:
    213 ; X64:       # %bb.0:
    214 ; X64-NEXT:    bsrl %edi, %eax
    215 ; X64-NEXT:    xorl $31, %eax
    216 ; X64-NEXT:    retq
    217 ;
    218 ; X32-CLZ-LABEL: ctlz_i32:
    219 ; X32-CLZ:       # %bb.0:
    220 ; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
    221 ; X32-CLZ-NEXT:    retl
    222 ;
    223 ; X64-CLZ-LABEL: ctlz_i32:
    224 ; X64-CLZ:       # %bb.0:
    225 ; X64-CLZ-NEXT:    lzcntl %edi, %eax
    226 ; X64-CLZ-NEXT:    retq
    227   %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
    228   ret i32 %tmp
    229 }
    230 
    231 define i64 @ctlz_i64(i64 %x) {
    232 ; X32-LABEL: ctlz_i64:
    233 ; X32:       # %bb.0:
    234 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    235 ; X32-NEXT:    testl %eax, %eax
    236 ; X32-NEXT:    jne .LBB7_1
    237 ; X32-NEXT:  # %bb.2:
    238 ; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
    239 ; X32-NEXT:    xorl $31, %eax
    240 ; X32-NEXT:    addl $32, %eax
    241 ; X32-NEXT:    xorl %edx, %edx
    242 ; X32-NEXT:    retl
    243 ; X32-NEXT:  .LBB7_1:
    244 ; X32-NEXT:    bsrl %eax, %eax
    245 ; X32-NEXT:    xorl $31, %eax
    246 ; X32-NEXT:    xorl %edx, %edx
    247 ; X32-NEXT:    retl
    248 ;
    249 ; X64-LABEL: ctlz_i64:
    250 ; X64:       # %bb.0:
    251 ; X64-NEXT:    bsrq %rdi, %rax
    252 ; X64-NEXT:    xorq $63, %rax
    253 ; X64-NEXT:    retq
    254 ;
    255 ; X32-CLZ-LABEL: ctlz_i64:
    256 ; X32-CLZ:       # %bb.0:
    257 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    258 ; X32-CLZ-NEXT:    testl %eax, %eax
    259 ; X32-CLZ-NEXT:    jne .LBB7_1
    260 ; X32-CLZ-NEXT:  # %bb.2:
    261 ; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
    262 ; X32-CLZ-NEXT:    addl $32, %eax
    263 ; X32-CLZ-NEXT:    xorl %edx, %edx
    264 ; X32-CLZ-NEXT:    retl
    265 ; X32-CLZ-NEXT:  .LBB7_1:
    266 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    267 ; X32-CLZ-NEXT:    xorl %edx, %edx
    268 ; X32-CLZ-NEXT:    retl
    269 ;
    270 ; X64-CLZ-LABEL: ctlz_i64:
    271 ; X64-CLZ:       # %bb.0:
    272 ; X64-CLZ-NEXT:    lzcntq %rdi, %rax
    273 ; X64-CLZ-NEXT:    retq
    274   %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
    275   ret i64 %tmp
    276 }
    277 
    278 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    279 define i8 @ctlz_i8_zero_test(i8 %n) {
    280 ; X32-LABEL: ctlz_i8_zero_test:
    281 ; X32:       # %bb.0:
    282 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
    283 ; X32-NEXT:    testb %al, %al
    284 ; X32-NEXT:    je .LBB8_1
    285 ; X32-NEXT:  # %bb.2: # %cond.false
    286 ; X32-NEXT:    movzbl %al, %eax
    287 ; X32-NEXT:    bsrl %eax, %eax
    288 ; X32-NEXT:    xorl $7, %eax
    289 ; X32-NEXT:    # kill: def $al killed $al killed $eax
    290 ; X32-NEXT:    retl
    291 ; X32-NEXT:  .LBB8_1:
    292 ; X32-NEXT:    movb    $8, %al
    293 ; X32-NEXT:    # kill: def $al killed $al killed $eax
    294 ; X32-NEXT:    retl
    295 ;
    296 ; X64-LABEL: ctlz_i8_zero_test:
    297 ; X64:       # %bb.0:
    298 ; X64-NEXT:    testb %dil, %dil
    299 ; X64-NEXT:    je .LBB8_1
    300 ; X64-NEXT:  # %bb.2: # %cond.false
    301 ; X64-NEXT:    movzbl %dil, %eax
    302 ; X64-NEXT:    bsrl %eax, %eax
    303 ; X64-NEXT:    xorl $7, %eax
    304 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    305 ; X64-NEXT:    retq
    306 ; X64-NEXT:  .LBB8_1:
    307 ; X64-NEXT:    movb    $8, %al
    308 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    309 ; X64-NEXT:    retq
    310 ;
    311 ; X32-CLZ-LABEL: ctlz_i8_zero_test:
    312 ; X32-CLZ:       # %bb.0:
    313 ; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    314 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    315 ; X32-CLZ-NEXT:    addl $-24, %eax
    316 ; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    317 ; X32-CLZ-NEXT:    retl
    318 ;
    319 ; X64-CLZ-LABEL: ctlz_i8_zero_test:
    320 ; X64-CLZ:       # %bb.0:
    321 ; X64-CLZ-NEXT:    movzbl %dil, %eax
    322 ; X64-CLZ-NEXT:    lzcntl %eax, %eax
    323 ; X64-CLZ-NEXT:    addl $-24, %eax
    324 ; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    325 ; X64-CLZ-NEXT:    retq
    326   %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
    327   ret i8 %tmp1
    328 }
    329 
    330 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    331 define i16 @ctlz_i16_zero_test(i16 %n) {
    332 ; X32-LABEL: ctlz_i16_zero_test:
    333 ; X32:       # %bb.0:
    334 ; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    335 ; X32-NEXT:    testw %ax, %ax
    336 ; X32-NEXT:    je .LBB9_1
    337 ; X32-NEXT:  # %bb.2: # %cond.false
    338 ; X32-NEXT:    bsrw %ax, %ax
    339 ; X32-NEXT:    xorl $15, %eax
    340 ; X32-NEXT:    # kill: def $ax killed $ax killed $eax
    341 ; X32-NEXT:    retl
    342 ; X32-NEXT:  .LBB9_1:
    343 ; X32-NEXT:    movw    $16, %ax
    344 ; X32-NEXT:    # kill: def $ax killed $ax killed $eax
    345 ; X32-NEXT:    retl
    346 ;
    347 ; X64-LABEL: ctlz_i16_zero_test:
    348 ; X64:       # %bb.0:
    349 ; X64-NEXT:    testw %di, %di
    350 ; X64-NEXT:    je .LBB9_1
    351 ; X64-NEXT:  # %bb.2: # %cond.false
    352 ; X64-NEXT:    bsrw %di, %ax
    353 ; X64-NEXT:    xorl $15, %eax
    354 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
    355 ; X64-NEXT:    retq
    356 ; X64-NEXT:  .LBB9_1:
    357 ; X64-NEXT:    movw $16, %ax
    358 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
    359 ; X64-NEXT:    retq
    360 ;
    361 ; X32-CLZ-LABEL: ctlz_i16_zero_test:
    362 ; X32-CLZ:       # %bb.0:
    363 ; X32-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
    364 ; X32-CLZ-NEXT:    retl
    365 ;
    366 ; X64-CLZ-LABEL: ctlz_i16_zero_test:
    367 ; X64-CLZ:       # %bb.0:
    368 ; X64-CLZ-NEXT:    lzcntw %di, %ax
    369 ; X64-CLZ-NEXT:    retq
    370   %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
    371   ret i16 %tmp1
    372 }
    373 
    374 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    375 define i32 @ctlz_i32_zero_test(i32 %n) {
    376 ; X32-LABEL: ctlz_i32_zero_test:
    377 ; X32:       # %bb.0:
    378 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    379 ; X32-NEXT:    testl %eax, %eax
    380 ; X32-NEXT:    je .LBB10_1
    381 ; X32-NEXT:  # %bb.2: # %cond.false
    382 ; X32-NEXT:    bsrl %eax, %eax
    383 ; X32-NEXT:    xorl $31, %eax
    384 ; X32-NEXT:    retl
    385 ; X32-NEXT:  .LBB10_1:
    386 ; X32-NEXT:    movl    $32, %eax
    387 ; X32-NEXT:    retl
    388 ;
    389 ; X64-LABEL: ctlz_i32_zero_test:
    390 ; X64:       # %bb.0:
    391 ; X64-NEXT:    testl %edi, %edi
    392 ; X64-NEXT:    je .LBB10_1
    393 ; X64-NEXT:  # %bb.2: # %cond.false
    394 ; X64-NEXT:    bsrl %edi, %eax
    395 ; X64-NEXT:    xorl $31, %eax
    396 ; X64-NEXT:    retq
    397 ; X64-NEXT:  .LBB10_1:
    398 ; X64-NEXT:    movl $32, %eax
    399 ; X64-NEXT:    retq
    400 ;
    401 ; X32-CLZ-LABEL: ctlz_i32_zero_test:
    402 ; X32-CLZ:       # %bb.0:
    403 ; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
    404 ; X32-CLZ-NEXT:    retl
    405 ;
    406 ; X64-CLZ-LABEL: ctlz_i32_zero_test:
    407 ; X64-CLZ:       # %bb.0:
    408 ; X64-CLZ-NEXT:    lzcntl %edi, %eax
    409 ; X64-CLZ-NEXT:    retq
    410   %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
    411   ret i32 %tmp1
    412 }
    413 
    414 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    415 define i64 @ctlz_i64_zero_test(i64 %n) {
    416 ; X32-LABEL: ctlz_i64_zero_test:
    417 ; X32:       # %bb.0:
    418 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    419 ; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %edx
    420 ; X32-NEXT:    movl $63, %eax
    421 ; X32-NEXT:    je .LBB11_2
    422 ; X32-NEXT:  # %bb.1:
    423 ; X32-NEXT:    movl %edx, %eax
    424 ; X32-NEXT:  .LBB11_2:
    425 ; X32-NEXT:    testl %ecx, %ecx
    426 ; X32-NEXT:    jne .LBB11_3
    427 ; X32-NEXT:  # %bb.4:
    428 ; X32-NEXT:    xorl $31, %eax
    429 ; X32-NEXT:    addl $32, %eax
    430 ; X32-NEXT:    xorl %edx, %edx
    431 ; X32-NEXT:    retl
    432 ; X32-NEXT:  .LBB11_3:
    433 ; X32-NEXT:    bsrl %ecx, %eax
    434 ; X32-NEXT:    xorl $31, %eax
    435 ; X32-NEXT:    xorl %edx, %edx
    436 ; X32-NEXT:    retl
    437 ;
    438 ; X64-LABEL: ctlz_i64_zero_test:
    439 ; X64:       # %bb.0:
    440 ; X64-NEXT:    testq %rdi, %rdi
    441 ; X64-NEXT:    je .LBB11_1
    442 ; X64-NEXT:  # %bb.2: # %cond.false
    443 ; X64-NEXT:    bsrq %rdi, %rax
    444 ; X64-NEXT:    xorq $63, %rax
    445 ; X64-NEXT:    retq
    446 ; X64-NEXT:  .LBB11_1:
    447 ; X64-NEXT:    movl $64, %eax
    448 ; X64-NEXT:    retq
    449 ;
    450 ; X32-CLZ-LABEL: ctlz_i64_zero_test:
    451 ; X32-CLZ:       # %bb.0:
    452 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    453 ; X32-CLZ-NEXT:    testl %eax, %eax
    454 ; X32-CLZ-NEXT:    jne .LBB11_1
    455 ; X32-CLZ-NEXT:  # %bb.2:
    456 ; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
    457 ; X32-CLZ-NEXT:    addl $32, %eax
    458 ; X32-CLZ-NEXT:    xorl %edx, %edx
    459 ; X32-CLZ-NEXT:    retl
    460 ; X32-CLZ-NEXT:  .LBB11_1:
    461 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    462 ; X32-CLZ-NEXT:    xorl %edx, %edx
    463 ; X32-CLZ-NEXT:    retl
    464 ;
    465 ; X64-CLZ-LABEL: ctlz_i64_zero_test:
    466 ; X64-CLZ:       # %bb.0:
    467 ; X64-CLZ-NEXT:    lzcntq %rdi, %rax
    468 ; X64-CLZ-NEXT:    retq
    469   %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
    470   ret i64 %tmp1
    471 }
    472 
    473 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    474 define i8 @cttz_i8_zero_test(i8 %n) {
    475 ; X32-LABEL: cttz_i8_zero_test:
    476 ; X32:       # %bb.0:
    477 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
    478 ; X32-NEXT:    testb %al, %al
    479 ; X32-NEXT:    je .LBB12_1
    480 ; X32-NEXT:  # %bb.2: # %cond.false
    481 ; X32-NEXT:    movzbl %al, %eax
    482 ; X32-NEXT:    bsfl %eax, %eax
    483 ; X32-NEXT:    # kill: def $al killed $al killed $eax
    484 ; X32-NEXT:    retl
    485 ; X32-NEXT:  .LBB12_1
    486 ; X32-NEXT:    movb $8, %al
    487 ; X32-NEXT:    # kill: def $al killed $al killed $eax
    488 ; X32-NEXT:    retl
    489 ;
    490 ; X64-LABEL: cttz_i8_zero_test:
    491 ; X64:       # %bb.0:
    492 ; X64-NEXT:    testb %dil, %dil
    493 ; X64-NEXT:    je .LBB12_1
    494 ; X64-NEXT:  # %bb.2: # %cond.false
    495 ; X64-NEXT:    movzbl %dil, %eax
    496 ; X64-NEXT:    bsfl %eax, %eax
    497 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    498 ; X64-NEXT:    retq
    499 ; X64-NEXT:  .LBB12_1:
    500 ; X64-NEXT:    movb $8, %al
    501 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    502 ; X64-NEXT:    retq
    503 ;
    504 ; X32-CLZ-LABEL: cttz_i8_zero_test:
    505 ; X32-CLZ:       # %bb.0:
    506 ; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    507 ; X32-CLZ-NEXT:    orl $256, %eax # imm = 0x100
    508 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
    509 ; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    510 ; X32-CLZ-NEXT:    retl
    511 ;
    512 ; X64-CLZ-LABEL: cttz_i8_zero_test:
    513 ; X64-CLZ:       # %bb.0:
    514 ; X64-CLZ-NEXT:    movzbl %dil, %eax
    515 ; X64-CLZ-NEXT:    orl $256, %eax # imm = 0x100
    516 ; X64-CLZ-NEXT:    tzcntl %eax, %eax
    517 ; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    518 ; X64-CLZ-NEXT:    retq
    519   %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
    520   ret i8 %tmp1
    521 }
    522 
    523 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    524 define i16 @cttz_i16_zero_test(i16 %n) {
    525 ; X32-LABEL: cttz_i16_zero_test:
    526 ; X32:       # %bb.0:
    527 ; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    528 ; X32-NEXT:    testw %ax, %ax
    529 ; X32-NEXT:    je .LBB13_1
    530 ; X32-NEXT:  # %bb.2: # %cond.false
    531 ; X32-NEXT:    bsfw %ax, %ax
    532 ; X32-NEXT:    retl
    533 ; X32-NEXT:  .LBB13_1
    534 ; X32-NEXT:    movw $16, %ax
    535 ; X32-NEXT:    retl
    536 ;
    537 ; X64-LABEL: cttz_i16_zero_test:
    538 ; X64:       # %bb.0:
    539 ; X64-NEXT:    testw %di, %di
    540 ; X64-NEXT:    je .LBB13_1
    541 ; X64-NEXT:  # %bb.2: # %cond.false
    542 ; X64-NEXT:    bsfw %di, %ax
    543 ; X64-NEXT:    retq
    544 ; X64-NEXT:  .LBB13_1:
    545 ; X64-NEXT:    movw $16, %ax
    546 ; X64-NEXT:    retq
    547 ;
    548 ; X32-CLZ-LABEL: cttz_i16_zero_test:
    549 ; X32-CLZ:       # %bb.0:
    550 ; X32-CLZ-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
    551 ; X32-CLZ-NEXT:    retl
    552 ;
    553 ; X64-CLZ-LABEL: cttz_i16_zero_test:
    554 ; X64-CLZ:       # %bb.0:
    555 ; X64-CLZ-NEXT:    tzcntw %di, %ax
    556 ; X64-CLZ-NEXT:    retq
    557   %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
    558   ret i16 %tmp1
    559 }
    560 
    561 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    562 define i32 @cttz_i32_zero_test(i32 %n) {
    563 ; X32-LABEL: cttz_i32_zero_test:
    564 ; X32:       # %bb.0:
    565 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    566 ; X32-NEXT:    testl %eax, %eax
    567 ; X32-NEXT:    je .LBB14_1
    568 ; X32-NEXT:  # %bb.2: # %cond.false
    569 ; X32-NEXT:    bsfl %eax, %eax
    570 ; X32-NEXT:    retl
    571 ; X32-NEXT:  .LBB14_1
    572 ; X32-NEXT:    movl $32, %eax
    573 ; X32-NEXT:    retl
    574 ;
    575 ; X64-LABEL: cttz_i32_zero_test:
    576 ; X64:       # %bb.0:
    577 ; X64-NEXT:    testl %edi, %edi
    578 ; X64-NEXT:    je .LBB14_1
    579 ; X64-NEXT:  # %bb.2: # %cond.false
    580 ; X64-NEXT:    bsfl %edi, %eax
    581 ; X64-NEXT:    retq
    582 ; X64-NEXT:  .LBB14_1:
    583 ; X64-NEXT:    movl $32, %eax
    584 ; X64-NEXT:    retq
    585 ;
    586 ; X32-CLZ-LABEL: cttz_i32_zero_test:
    587 ; X32-CLZ:       # %bb.0:
    588 ; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
    589 ; X32-CLZ-NEXT:    retl
    590 ;
    591 ; X64-CLZ-LABEL: cttz_i32_zero_test:
    592 ; X64-CLZ:       # %bb.0:
    593 ; X64-CLZ-NEXT:    tzcntl %edi, %eax
    594 ; X64-CLZ-NEXT:    retq
    595   %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
    596   ret i32 %tmp1
    597 }
    598 
    599 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    600 define i64 @cttz_i64_zero_test(i64 %n) {
    601 ; X32-LABEL: cttz_i64_zero_test:
    602 ; X32:       # %bb.0:
    603 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    604 ; X32-NEXT:    bsfl {{[0-9]+}}(%esp), %edx
    605 ; X32-NEXT:    movl $32, %eax
    606 ; X32-NEXT:    je .LBB15_2
    607 ; X32-NEXT:  # %bb.1:
    608 ; X32-NEXT:    movl %edx, %eax
    609 ; X32-NEXT:  .LBB15_2:
    610 ; X32-NEXT:    testl %ecx, %ecx
    611 ; X32-NEXT:    jne .LBB15_3
    612 ; X32-NEXT:  # %bb.4:
    613 ; X32-NEXT:    addl $32, %eax
    614 ; X32-NEXT:    xorl %edx, %edx
    615 ; X32-NEXT:    retl
    616 ; X32-NEXT:  .LBB15_3:
    617 ; X32-NEXT:    bsfl %ecx, %eax
    618 ; X32-NEXT:    xorl %edx, %edx
    619 ; X32-NEXT:    retl
    620 ;
    621 ; X64-LABEL: cttz_i64_zero_test:
    622 ; X64:       # %bb.0:
    623 ; X64-NEXT:    testq %rdi, %rdi
    624 ; X64-NEXT:    je .LBB15_1
    625 ; X64-NEXT:  # %bb.2: # %cond.false
    626 ; X64-NEXT:    bsfq %rdi, %rax
    627 ; X64-NEXT:    retq
    628 ; X64-NEXT:  .LBB15_1:
    629 ; X64-NEXT:    movl $64, %eax
    630 ; X64-NEXT:    retq
    631 ;
    632 ; X32-CLZ-LABEL: cttz_i64_zero_test:
    633 ; X32-CLZ:       # %bb.0:
    634 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    635 ; X32-CLZ-NEXT:    testl %eax, %eax
    636 ; X32-CLZ-NEXT:    jne .LBB15_1
    637 ; X32-CLZ-NEXT:  # %bb.2:
    638 ; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
    639 ; X32-CLZ-NEXT:    addl $32, %eax
    640 ; X32-CLZ-NEXT:    xorl %edx, %edx
    641 ; X32-CLZ-NEXT:    retl
    642 ; X32-CLZ-NEXT:  .LBB15_1:
    643 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
    644 ; X32-CLZ-NEXT:    xorl %edx, %edx
    645 ; X32-CLZ-NEXT:    retl
    646 ;
    647 ; X64-CLZ-LABEL: cttz_i64_zero_test:
    648 ; X64-CLZ:       # %bb.0:
    649 ; X64-CLZ-NEXT:    tzcntq %rdi, %rax
    650 ; X64-CLZ-NEXT:    retq
    651   %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
    652   ret i64 %tmp1
    653 }
    654 
    655 ; Don't generate the cmovne when the source is known non-zero (and bsr would
    656 ; not set ZF).
    657 ; rdar://9490949
    658 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
    659 ;        codegen doesn't know how to delete the movl and je.
    660 define i32 @ctlz_i32_fold_cmov(i32 %n) {
    661 ; X32-LABEL: ctlz_i32_fold_cmov:
    662 ; X32:       # %bb.0:
    663 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    664 ; X32-NEXT:    orl $1, %eax
    665 ; X32-NEXT:    je .LBB16_1
    666 ; X32-NEXT:  # %bb.2: # %cond.false
    667 ; X32-NEXT:    bsrl %eax, %eax
    668 ; X32-NEXT:    xorl $31, %eax
    669 ; X32-NEXT:    retl
    670 ; X32-NEXT:  .LBB16_1
    671 ; X32-NEXT:    movl $32, %eax
    672 ; X32-NEXT:    retl
    673 ;
    674 ; X64-LABEL: ctlz_i32_fold_cmov:
    675 ; X64:       # %bb.0:
    676 ; X64-NEXT:    orl $1, %edi
    677 ; X64-NEXT:    je .LBB16_1
    678 ; X64-NEXT:  # %bb.2: # %cond.false
    679 ; X64-NEXT:    bsrl %edi, %eax
    680 ; X64-NEXT:    xorl $31, %eax
    681 ; X64-NEXT:    retq
    682 ; X64-NEXT:  .LBB16_1:
    683 ; X64-NEXT:    movl $32, %eax
    684 ; X64-NEXT:    retq
    685 ;
    686 ; X32-CLZ-LABEL: ctlz_i32_fold_cmov:
    687 ; X32-CLZ:       # %bb.0:
    688 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    689 ; X32-CLZ-NEXT:    orl $1, %eax
    690 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    691 ; X32-CLZ-NEXT:    retl
    692 ;
    693 ; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
    694 ; X64-CLZ:       # %bb.0:
    695 ; X64-CLZ-NEXT:    orl $1, %edi
    696 ; X64-CLZ-NEXT:    lzcntl %edi, %eax
    697 ; X64-CLZ-NEXT:    retq
    698   %or = or i32 %n, 1
    699   %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
    700   ret i32 %tmp1
    701 }
    702 
    703 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
    704 ; the most significant bit, which is what 'bsr' does natively.
    705 ; FIXME: We should probably select BSR instead of LZCNT in these circumstances.
    706 define i32 @ctlz_bsr(i32 %n) {
    707 ; X32-LABEL: ctlz_bsr:
    708 ; X32:       # %bb.0:
    709 ; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
    710 ; X32-NEXT:    retl
    711 ;
    712 ; X64-LABEL: ctlz_bsr:
    713 ; X64:       # %bb.0:
    714 ; X64-NEXT:    bsrl %edi, %eax
    715 ; X64-NEXT:    retq
    716 ;
    717 ; X32-CLZ-LABEL: ctlz_bsr:
    718 ; X32-CLZ:       # %bb.0:
    719 ; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
    720 ; X32-CLZ-NEXT:    xorl $31, %eax
    721 ; X32-CLZ-NEXT:    retl
    722 ;
    723 ; X64-CLZ-LABEL: ctlz_bsr:
    724 ; X64-CLZ:       # %bb.0:
    725 ; X64-CLZ-NEXT:    lzcntl %edi, %eax
    726 ; X64-CLZ-NEXT:    xorl $31, %eax
    727 ; X64-CLZ-NEXT:    retq
    728   %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
    729   %bsr = xor i32 %ctlz, 31
    730   ret i32 %bsr
    731 }
    732 
    733 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    734 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
    735 ;        codegen doesn't know how to combine the $32 and $31 into $63.
    736 define i32 @ctlz_bsr_zero_test(i32 %n) {
    737 ; X32-LABEL: ctlz_bsr_zero_test:
    738 ; X32:       # %bb.0:
    739 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    740 ; X32-NEXT:    testl %eax, %eax
    741 ; X32-NEXT:    je .LBB18_1
    742 ; X32-NEXT:  # %bb.2: # %cond.false
    743 ; X32-NEXT:    bsrl %eax, %eax
    744 ; X32-NEXT:    xorl $31, %eax
    745 ; X32-NEXT:    xorl $31, %eax
    746 ; X32-NEXT:    retl
    747 ; X32-NEXT:  .LBB18_1:
    748 ; X32-NEXT:    movl $32, %eax
    749 ; X32-NEXT:    xorl $31, %eax
    750 ; X32-NEXT:    retl
    751 ;
    752 ; X64-LABEL: ctlz_bsr_zero_test:
    753 ; X64:       # %bb.0:
    754 ; X64-NEXT:    testl %edi, %edi
    755 ; X64-NEXT:    je .LBB18_1
    756 ; X64-NEXT:  # %bb.2: # %cond.false
    757 ; X64-NEXT:    bsrl %edi, %eax
    758 ; X64-NEXT:    xorl $31, %eax
    759 ; X64-NEXT:    xorl $31, %eax
    760 ; X64-NEXT:    retq
    761 ; X64-NEXT:  .LBB18_1:
    762 ; X64-NEXT:    movl $32, %eax
    763 ; X64-NEXT:    xorl $31, %eax
    764 ; X64-NEXT:    retq
    765 ;
    766 ; X32-CLZ-LABEL: ctlz_bsr_zero_test:
    767 ; X32-CLZ:       # %bb.0:
    768 ; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
    769 ; X32-CLZ-NEXT:    xorl $31, %eax
    770 ; X32-CLZ-NEXT:    retl
    771 ;
    772 ; X64-CLZ-LABEL: ctlz_bsr_zero_test:
    773 ; X64-CLZ:       # %bb.0:
    774 ; X64-CLZ-NEXT:    lzcntl %edi, %eax
    775 ; X64-CLZ-NEXT:    xorl $31, %eax
    776 ; X64-CLZ-NEXT:    retq
    777   %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
    778   %bsr = xor i32 %ctlz, 31
    779   ret i32 %bsr
    780 }
    781 
    782 define i8 @cttz_i8_knownbits(i8 %x)  {
    783 ; X32-LABEL: cttz_i8_knownbits:
    784 ; X32:       # %bb.0:
    785 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
    786 ; X32-NEXT:    orb $2, %al
    787 ; X32-NEXT:    movzbl %al, %eax
    788 ; X32-NEXT:    bsfl %eax, %eax
    789 ; X32-NEXT:    # kill: def $al killed $al killed $eax
    790 ; X32-NEXT:    retl
    791 ;
    792 ; X64-LABEL: cttz_i8_knownbits:
    793 ; X64:       # %bb.0:
    794 ; X64-NEXT:    orb $2, %dil
    795 ; X64-NEXT:    movzbl %dil, %eax
    796 ; X64-NEXT:    bsfl %eax, %eax
    797 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    798 ; X64-NEXT:    retq
    799 ;
    800 ; X32-CLZ-LABEL: cttz_i8_knownbits:
    801 ; X32-CLZ:       # %bb.0:
    802 ; X32-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
    803 ; X32-CLZ-NEXT:    orb $2, %al
    804 ; X32-CLZ-NEXT:    movzbl %al, %eax
    805 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
    806 ; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    807 ; X32-CLZ-NEXT:    retl
    808 ;
    809 ; X64-CLZ-LABEL: cttz_i8_knownbits:
    810 ; X64-CLZ:       # %bb.0:
    811 ; X64-CLZ-NEXT:    orb $2, %dil
    812 ; X64-CLZ-NEXT:    movzbl %dil, %eax
    813 ; X64-CLZ-NEXT:    tzcntl %eax, %eax
    814 ; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    815 ; X64-CLZ-NEXT:    retq
    816   %x2 = or i8 %x, 2
    817   %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
    818   %tmp2 = and i8 %tmp, 1
    819   ret i8 %tmp2
    820 }
    821 
    822 define i8 @ctlz_i8_knownbits(i8 %x)  {
    823 ; X32-LABEL: ctlz_i8_knownbits:
    824 ; X32:       # %bb.0:
    825 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
    826 ; X32-NEXT:    orb $64, %al
    827 ; X32-NEXT:    movzbl %al, %eax
    828 ; X32-NEXT:    bsrl %eax, %eax
    829 ; X32-NEXT:    xorl $7, %eax
    830 ; X32-NEXT:    # kill: def $al killed $al killed $eax
    831 ; X32-NEXT:    retl
    832 ;
    833 ; X64-LABEL: ctlz_i8_knownbits:
    834 ; X64:       # %bb.0:
    835 ; X64-NEXT:    orb $64, %dil
    836 ; X64-NEXT:    movzbl %dil, %eax
    837 ; X64-NEXT:    bsrl %eax, %eax
    838 ; X64-NEXT:    xorl $7, %eax
    839 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    840 ; X64-NEXT:    retq
    841 ;
    842 ; X32-CLZ-LABEL: ctlz_i8_knownbits:
    843 ; X32-CLZ:       # %bb.0:
    844 ; X32-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
    845 ; X32-CLZ-NEXT:    orb $64, %al
    846 ; X32-CLZ-NEXT:    movzbl %al, %eax
    847 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    848 ; X32-CLZ-NEXT:    addl $-24, %eax
    849 ; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    850 ; X32-CLZ-NEXT:    retl
    851 ;
    852 ; X64-CLZ-LABEL: ctlz_i8_knownbits:
    853 ; X64-CLZ:       # %bb.0:
    854 ; X64-CLZ-NEXT:    orb $64, %dil
    855 ; X64-CLZ-NEXT:    movzbl %dil, %eax
    856 ; X64-CLZ-NEXT:    lzcntl %eax, %eax
    857 ; X64-CLZ-NEXT:    addl $-24, %eax
    858 ; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
    859 ; X64-CLZ-NEXT:    retq
    860 
    861   %x2 = or i8 %x, 64
    862   %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
    863   %tmp2 = and i8 %tmp, 1
    864   ret i8 %tmp2
    865 }
    866 
    867 ; Make sure we can detect that the input is non-zero and avoid cmov after BSR
    868 ; This is relevant for 32-bit mode without lzcnt
    869 define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
    870 ; X32-LABEL: ctlz_i64_zero_test_knownneverzero:
    871 ; X32:       # %bb.0:
    872 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    873 ; X32-NEXT:    testl %eax, %eax
    874 ; X32-NEXT:    jne .LBB21_1
    875 ; X32-NEXT:  # %bb.2:
    876 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    877 ; X32-NEXT:    orl $1, %eax
    878 ; X32-NEXT:    bsrl %eax, %eax
    879 ; X32-NEXT:    xorl $31, %eax
    880 ; X32-NEXT:    orl $32, %eax
    881 ; X32-NEXT:    xorl %edx, %edx
    882 ; X32-NEXT:    retl
    883 ; X32-NEXT:  .LBB21_1:
    884 ; X32-NEXT:    bsrl %eax, %eax
    885 ; X32-NEXT:    xorl $31, %eax
    886 ; X32-NEXT:    xorl %edx, %edx
    887 ; X32-NEXT:    retl
    888 ;
    889 ; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
    890 ; X64:       # %bb.0:
    891 ; X64-NEXT:    orq $1, %rdi
    892 ; X64-NEXT:    je .LBB21_1
    893 ; X64-NEXT:  # %bb.2: # %cond.false
    894 ; X64-NEXT:    bsrq %rdi, %rax
    895 ; X64-NEXT:    xorq $63, %rax
    896 ; X64-NEXT:    retq
    897 ; X64-NEXT:  .LBB21_1:
    898 ; X64-NEXT:    movl $64, %eax
    899 ; X64-NEXT:    retq
    900 ;
    901 ; X32-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
    902 ; X32-CLZ:       # %bb.0:
    903 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    904 ; X32-CLZ-NEXT:    testl %eax, %eax
    905 ; X32-CLZ-NEXT:    jne .LBB21_1
    906 ; X32-CLZ-NEXT:  # %bb.2:
    907 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    908 ; X32-CLZ-NEXT:    orl $1, %eax
    909 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    910 ; X32-CLZ-NEXT:    orl $32, %eax
    911 ; X32-CLZ-NEXT:    xorl %edx, %edx
    912 ; X32-CLZ-NEXT:    retl
    913 ; X32-CLZ-NEXT:  .LBB21_1:
    914 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
    915 ; X32-CLZ-NEXT:    xorl %edx, %edx
    916 ; X32-CLZ-NEXT:    retl
    917 ;
    918 ; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
    919 ; X64-CLZ:       # %bb.0:
    920 ; X64-CLZ-NEXT:    orq $1, %rdi
    921 ; X64-CLZ-NEXT:    lzcntq %rdi, %rax
    922 ; X64-CLZ-NEXT:    retq
    923   %o = or i64 %n, 1
    924   %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false)
    925   ret i64 %tmp1
    926 }
    927 
    928 ; Make sure we can detect that the input is non-zero and avoid cmov after BSF
    929 ; This is relevant for 32-bit mode without tzcnt
    930 define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
    931 ; X32-LABEL: cttz_i64_zero_test_knownneverzero:
    932 ; X32:       # %bb.0:
    933 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    934 ; X32-NEXT:    testl %eax, %eax
    935 ; X32-NEXT:    jne .LBB22_1
    936 ; X32-NEXT:  # %bb.2:
    937 ; X32-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
    938 ; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
    939 ; X32-NEXT:    bsfl %eax, %eax
    940 ; X32-NEXT:    orl $32, %eax
    941 ; X32-NEXT:    xorl %edx, %edx
    942 ; X32-NEXT:    retl
    943 ; X32-NEXT:  .LBB22_1:
    944 ; X32-NEXT:    bsfl %eax, %eax
    945 ; X32-NEXT:    xorl %edx, %edx
    946 ; X32-NEXT:    retl
    947 ;
    948 ; X64-LABEL: cttz_i64_zero_test_knownneverzero:
    949 ; X64:       # %bb.0:
    950 ; X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    951 ; X64-NEXT:    orq %rdi, %rax
    952 ; X64-NEXT:    je .LBB22_1
    953 ; X64-NEXT:  # %bb.2: # %cond.false
    954 ; X64-NEXT:    bsfq %rax, %rax
    955 ; X64-NEXT:    retq
    956 ; X64-NEXT:  .LBB22_1:
    957 ; X64-NEXT:    movl $64, %eax
    958 ; X64-NEXT:    retq
    959 ;
    960 ; X32-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
    961 ; X32-CLZ:       # %bb.0:
    962 ; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
    963 ; X32-CLZ-NEXT:    testl %eax, %eax
    964 ; X32-CLZ-NEXT:    jne .LBB22_1
    965 ; X32-CLZ-NEXT:  # %bb.2:
    966 ; X32-CLZ-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
    967 ; X32-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
    968 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
    969 ; X32-CLZ-NEXT:    orl $32, %eax
    970 ; X32-CLZ-NEXT:    xorl %edx, %edx
    971 ; X32-CLZ-NEXT:    retl
    972 ; X32-CLZ-NEXT:  .LBB22_1:
    973 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
    974 ; X32-CLZ-NEXT:    xorl %edx, %edx
    975 ; X32-CLZ-NEXT:    retl
    976 ;
    977 ; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
    978 ; X64-CLZ:       # %bb.0:
    979 ; X64-CLZ-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    980 ; X64-CLZ-NEXT:    orq %rdi, %rax
    981 ; X64-CLZ-NEXT:    tzcntq %rax, %rax
    982 ; X64-CLZ-NEXT:    retq
    983   %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000
    984   %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false)
    985   ret i64 %tmp1
    986 }
    987