Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
      2 
      3 declare i8 @llvm.cttz.i8(i8, i1)
      4 declare i16 @llvm.cttz.i16(i16, i1)
      5 declare i32 @llvm.cttz.i32(i32, i1)
      6 declare i64 @llvm.cttz.i64(i64, i1)
      7 declare i8 @llvm.ctlz.i8(i8, i1)
      8 declare i16 @llvm.ctlz.i16(i16, i1)
      9 declare i32 @llvm.ctlz.i32(i32, i1)
     10 declare i64 @llvm.ctlz.i64(i64, i1)
     11 
     12 define i8 @cttz_i8(i8 %x)  {
     13 ; CHECK-LABEL: cttz_i8:
     14 ; CHECK:       # BB#0:
     15 ; CHECK-NEXT:    movzbl %dil, %eax
     16 ; CHECK-NEXT:    bsfl %eax, %eax
     17 ; CHECK-NEXT:    # kill
     18 ; CHECK-NEXT:    retq
     19   %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
     20   ret i8 %tmp
     21 }
     22 
     23 define i16 @cttz_i16(i16 %x)  {
     24 ; CHECK-LABEL: cttz_i16:
     25 ; CHECK:       # BB#0:
     26 ; CHECK-NEXT:    bsfw %di, %ax
     27 ; CHECK-NEXT:    retq
     28   %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
     29   ret i16 %tmp
     30 }
     31 
     32 define i32 @cttz_i32(i32 %x)  {
     33 ; CHECK-LABEL: cttz_i32:
     34 ; CHECK:       # BB#0:
     35 ; CHECK-NEXT:    bsfl %edi, %eax
     36 ; CHECK-NEXT:    retq
     37   %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
     38   ret i32 %tmp
     39 }
     40 
     41 define i64 @cttz_i64(i64 %x)  {
     42 ; CHECK-LABEL: cttz_i64:
     43 ; CHECK:       # BB#0:
     44 ; CHECK-NEXT:    bsfq %rdi, %rax
     45 ; CHECK-NEXT:    retq
     46   %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
     47   ret i64 %tmp
     48 }
     49 
     50 define i8 @ctlz_i8(i8 %x) {
     51 ; CHECK-LABEL: ctlz_i8:
     52 ; CHECK:       # BB#0:
     53 ; CHECK-NEXT:    movzbl %dil, %eax
     54 ; CHECK-NEXT:    bsrl %eax, %eax
     55 ; CHECK-NEXT:    xorl $7, %eax
     56 ; CHECK-NEXT:    # kill
     57 ; CHECK-NEXT:    retq
     58   %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
     59   ret i8 %tmp2
     60 }
     61 
     62 define i16 @ctlz_i16(i16 %x) {
     63 ; CHECK-LABEL: ctlz_i16:
     64 ; CHECK:       # BB#0:
     65 ; CHECK-NEXT:    bsrw %di, %ax
     66 ; CHECK-NEXT:    xorl $15, %eax
     67 ; CHECK-NEXT:    # kill
     68 ; CHECK-NEXT:    retq
     69   %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
     70   ret i16 %tmp2
     71 }
     72 
     73 define i32 @ctlz_i32(i32 %x) {
     74 ; CHECK-LABEL: ctlz_i32:
     75 ; CHECK:       # BB#0:
     76 ; CHECK-NEXT:    bsrl %edi, %eax
     77 ; CHECK-NEXT:    xorl $31, %eax
     78 ; CHECK-NEXT:    retq
     79   %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
     80   ret i32 %tmp
     81 }
     82 
     83 define i64 @ctlz_i64(i64 %x) {
     84 ; CHECK-LABEL: ctlz_i64:
     85 ; CHECK:       # BB#0:
     86 ; CHECK-NEXT:    bsrq %rdi, %rax
     87 ; CHECK-NEXT:    xorq $63, %rax
     88 ; CHECK-NEXT:    retq
     89   %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
     90   ret i64 %tmp
     91 }
     92 
     93 define i8 @ctlz_i8_zero_test(i8 %n) {
     94 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
     95 
     96 ; CHECK-LABEL: ctlz_i8_zero_test:
     97 ; CHECK:       # BB#0:
     98 ; CHECK-NEXT:    movb $8, %al
     99 ; CHECK-NEXT:    testb %dil, %dil
    100 ; CHECK-NEXT:    je .LBB8_2
    101 ; CHECK-NEXT:  # BB#1: # %cond.false
    102 ; CHECK-NEXT:    movzbl %dil, %eax
    103 ; CHECK-NEXT:    bsrl %eax, %eax
    104 ; CHECK-NEXT:    xorl $7, %eax
    105 ; CHECK-NEXT:  .LBB8_2: # %cond.end
    106 ; CHECK-NEXT:    # kill
    107 ; CHECK-NEXT:    retq
    108   %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
    109   ret i8 %tmp1
    110 }
    111 
    112 define i16 @ctlz_i16_zero_test(i16 %n) {
    113 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    114 
    115 ; CHECK-LABEL: ctlz_i16_zero_test:
    116 ; CHECK:       # BB#0:
    117 ; CHECK-NEXT:    movw $16, %ax
    118 ; CHECK-NEXT:    testw %di, %di
    119 ; CHECK-NEXT:    je .LBB9_2
    120 ; CHECK-NEXT:  # BB#1: # %cond.false
    121 ; CHECK-NEXT:    bsrw %di, %ax
    122 ; CHECK-NEXT:    xorl $15, %eax
    123 ; CHECK-NEXT:  .LBB9_2: # %cond.end
    124 ; CHECK-NEXT:    # kill
    125 ; CHECK-NEXT:    retq
    126   %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
    127   ret i16 %tmp1
    128 }
    129 
    130 define i32 @ctlz_i32_zero_test(i32 %n) {
    131 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    132 
    133 ; CHECK-LABEL: ctlz_i32_zero_test:
    134 ; CHECK:       # BB#0:
    135 ; CHECK-NEXT:    movl $32, %eax
    136 ; CHECK-NEXT:    testl %edi, %edi
    137 ; CHECK-NEXT:    je .LBB10_2
    138 ; CHECK-NEXT:  # BB#1: # %cond.false
    139 ; CHECK-NEXT:    bsrl %edi, %eax
    140 ; CHECK-NEXT:    xorl $31, %eax
    141 ; CHECK-NEXT:  .LBB10_2: # %cond.end
    142 ; CHECK-NEXT:    retq
    143   %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
    144   ret i32 %tmp1
    145 }
    146 
    147 define i64 @ctlz_i64_zero_test(i64 %n) {
    148 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    149 
    150 ; CHECK-LABEL: ctlz_i64_zero_test:
    151 ; CHECK:       # BB#0:
    152 ; CHECK-NEXT:    movl $64, %eax
    153 ; CHECK-NEXT:    testq %rdi, %rdi
    154 ; CHECK-NEXT:    je .LBB11_2
    155 ; CHECK-NEXT:  # BB#1: # %cond.false
    156 ; CHECK-NEXT:    bsrq %rdi, %rax
    157 ; CHECK-NEXT:    xorq $63, %rax
    158 ; CHECK-NEXT:  .LBB11_2: # %cond.end
    159 ; CHECK-NEXT:    retq
    160   %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
    161   ret i64 %tmp1
    162 }
    163 
    164 define i8 @cttz_i8_zero_test(i8 %n) {
    165 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    166 
    167 ; CHECK-LABEL: cttz_i8_zero_test:
    168 ; CHECK:       # BB#0:
    169 ; CHECK-NEXT:    movb $8, %al
    170 ; CHECK-NEXT:    testb %dil, %dil
    171 ; CHECK-NEXT:    je .LBB12_2
    172 ; CHECK-NEXT:  # BB#1: # %cond.false
    173 ; CHECK-NEXT:    movzbl %dil, %eax
    174 ; CHECK-NEXT:    bsfl %eax, %eax
    175 ; CHECK-NEXT:  .LBB12_2: # %cond.end
    176 ; CHECK-NEXT:    # kill
    177 ; CHECK-NEXT:    retq
    178   %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
    179   ret i8 %tmp1
    180 }
    181 
    182 define i16 @cttz_i16_zero_test(i16 %n) {
    183 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    184 
    185 ; CHECK-LABEL: cttz_i16_zero_test:
    186 ; CHECK:       # BB#0:
    187 ; CHECK-NEXT:    movw $16, %ax
    188 ; CHECK-NEXT:    testw %di, %di
    189 ; CHECK-NEXT:    je .LBB13_2
    190 ; CHECK-NEXT:  # BB#1: # %cond.false
    191 ; CHECK-NEXT:    bsfw %di, %ax
    192 ; CHECK-NEXT:  .LBB13_2: # %cond.end
    193 ; CHECK-NEXT:    retq
    194   %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
    195   ret i16 %tmp1
    196 }
    197 
    198 define i32 @cttz_i32_zero_test(i32 %n) {
    199 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    200 
    201 ; CHECK-LABEL: cttz_i32_zero_test:
    202 ; CHECK:       # BB#0:
    203 ; CHECK-NEXT:    movl $32, %eax
    204 ; CHECK-NEXT:    testl %edi, %edi
    205 ; CHECK-NEXT:    je .LBB14_2
    206 ; CHECK-NEXT:  # BB#1: # %cond.false
    207 ; CHECK-NEXT:    bsfl %edi, %eax
    208 ; CHECK-NEXT:  .LBB14_2: # %cond.end
    209 ; CHECK-NEXT:    retq
    210   %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
    211   ret i32 %tmp1
    212 }
    213 
    214 define i64 @cttz_i64_zero_test(i64 %n) {
    215 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    216 
    217 ; CHECK-LABEL: cttz_i64_zero_test:
    218 ; CHECK:       # BB#0:
    219 ; CHECK-NEXT:    movl $64, %eax
    220 ; CHECK-NEXT:    testq %rdi, %rdi
    221 ; CHECK-NEXT:    je .LBB15_2
    222 ; CHECK-NEXT:  # BB#1: # %cond.false
    223 ; CHECK-NEXT:    bsfq %rdi, %rax
    224 ; CHECK-NEXT:  .LBB15_2: # %cond.end
    225 ; CHECK-NEXT:    retq
    226   %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
    227   ret i64 %tmp1
    228 }
    229 
    230 define i32 @ctlz_i32_fold_cmov(i32 %n) {
    231 ; Don't generate the cmovne when the source is known non-zero (and bsr would
    232 ; not set ZF).
    233 ; rdar://9490949
    234 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
    235 ;        codegen doesn't know how to delete the movl and je.
    236 
    237 ; CHECK-LABEL: ctlz_i32_fold_cmov:
    238 ; CHECK:       # BB#0:
    239 ; CHECK-NEXT:    orl $1, %edi
    240 ; CHECK-NEXT:    movl $32, %eax
    241 ; CHECK-NEXT:    je .LBB16_2
    242 ; CHECK-NEXT:  # BB#1: # %cond.false
    243 ; CHECK-NEXT:    bsrl %edi, %eax
    244 ; CHECK-NEXT:    xorl $31, %eax
    245 ; CHECK-NEXT:  .LBB16_2: # %cond.end
    246 ; CHECK-NEXT:    retq
    247   %or = or i32 %n, 1
    248   %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
    249   ret i32 %tmp1
    250 }
    251 
    252 define i32 @ctlz_bsr(i32 %n) {
    253 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
    254 ; the most significant bit, which is what 'bsr' does natively.
    255 
    256 ; CHECK-LABEL: ctlz_bsr:
    257 ; CHECK:       # BB#0:
    258 ; CHECK-NEXT:    bsrl %edi, %eax
    259 ; CHECK-NEXT:    retq
    260   %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
    261   %bsr = xor i32 %ctlz, 31
    262   ret i32 %bsr
    263 }
    264 
    265 define i32 @ctlz_bsr_zero_test(i32 %n) {
    266 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
    267 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
    268 ;        codegen doesn't know how to combine the $32 and $31 into $63.
    269 
    270 ; CHECK-LABEL: ctlz_bsr_zero_test:
    271 ; CHECK:       # BB#0:
    272 ; CHECK-NEXT:    movl $32, %eax
    273 ; CHECK-NEXT:    testl %edi, %edi
    274 ; CHECK-NEXT:    je .LBB18_2
    275 ; CHECK-NEXT:  # BB#1: # %cond.false
    276 ; CHECK-NEXT:    bsrl %edi, %eax
    277 ; CHECK-NEXT:    xorl $31, %eax
    278 ; CHECK-NEXT:  .LBB18_2: # %cond.end
    279 ; CHECK-NEXT:    xorl $31, %eax
    280 ; CHECK-NEXT:    retq
    281   %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
    282   %bsr = xor i32 %ctlz, 31
    283   ret i32 %bsr
    284 }
    285