1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s 2 3 declare i8 @llvm.cttz.i8(i8, i1) 4 declare i16 @llvm.cttz.i16(i16, i1) 5 declare i32 @llvm.cttz.i32(i32, i1) 6 declare i64 @llvm.cttz.i64(i64, i1) 7 declare i8 @llvm.ctlz.i8(i8, i1) 8 declare i16 @llvm.ctlz.i16(i16, i1) 9 declare i32 @llvm.ctlz.i32(i32, i1) 10 declare i64 @llvm.ctlz.i64(i64, i1) 11 12 define i8 @cttz_i8(i8 %x) { 13 ; CHECK-LABEL: cttz_i8: 14 ; CHECK: # BB#0: 15 ; CHECK-NEXT: movzbl %dil, %eax 16 ; CHECK-NEXT: bsfl %eax, %eax 17 ; CHECK-NEXT: # kill 18 ; CHECK-NEXT: retq 19 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true ) 20 ret i8 %tmp 21 } 22 23 define i16 @cttz_i16(i16 %x) { 24 ; CHECK-LABEL: cttz_i16: 25 ; CHECK: # BB#0: 26 ; CHECK-NEXT: bsfw %di, %ax 27 ; CHECK-NEXT: retq 28 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true ) 29 ret i16 %tmp 30 } 31 32 define i32 @cttz_i32(i32 %x) { 33 ; CHECK-LABEL: cttz_i32: 34 ; CHECK: # BB#0: 35 ; CHECK-NEXT: bsfl %edi, %eax 36 ; CHECK-NEXT: retq 37 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true ) 38 ret i32 %tmp 39 } 40 41 define i64 @cttz_i64(i64 %x) { 42 ; CHECK-LABEL: cttz_i64: 43 ; CHECK: # BB#0: 44 ; CHECK-NEXT: bsfq %rdi, %rax 45 ; CHECK-NEXT: retq 46 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true ) 47 ret i64 %tmp 48 } 49 50 define i8 @ctlz_i8(i8 %x) { 51 ; CHECK-LABEL: ctlz_i8: 52 ; CHECK: # BB#0: 53 ; CHECK-NEXT: movzbl %dil, %eax 54 ; CHECK-NEXT: bsrl %eax, %eax 55 ; CHECK-NEXT: xorl $7, %eax 56 ; CHECK-NEXT: # kill 57 ; CHECK-NEXT: retq 58 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true ) 59 ret i8 %tmp2 60 } 61 62 define i16 @ctlz_i16(i16 %x) { 63 ; CHECK-LABEL: ctlz_i16: 64 ; CHECK: # BB#0: 65 ; CHECK-NEXT: bsrw %di, %ax 66 ; CHECK-NEXT: xorl $15, %eax 67 ; CHECK-NEXT: # kill 68 ; CHECK-NEXT: retq 69 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true ) 70 ret i16 %tmp2 71 } 72 73 define i32 @ctlz_i32(i32 %x) { 74 ; CHECK-LABEL: ctlz_i32: 75 ; CHECK: # BB#0: 76 ; CHECK-NEXT: bsrl %edi, %eax 77 ; CHECK-NEXT: xorl $31, %eax 78 ; CHECK-NEXT: retq 79 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true ) 80 ret i32 %tmp 81 } 82 83 define i64 @ctlz_i64(i64 %x) { 84 ; CHECK-LABEL: ctlz_i64: 85 ; CHECK: # BB#0: 86 ; CHECK-NEXT: bsrq %rdi, %rax 87 ; CHECK-NEXT: xorq $63, %rax 88 ; CHECK-NEXT: retq 89 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true ) 90 ret i64 %tmp 91 } 92 93 define i8 @ctlz_i8_zero_test(i8 %n) { 94 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 95 96 ; CHECK-LABEL: ctlz_i8_zero_test: 97 ; CHECK: # BB#0: 98 ; CHECK-NEXT: movb $8, %al 99 ; CHECK-NEXT: testb %dil, %dil 100 ; CHECK-NEXT: je .LBB8_2 101 ; CHECK-NEXT: # BB#1: # %cond.false 102 ; CHECK-NEXT: movzbl %dil, %eax 103 ; CHECK-NEXT: bsrl %eax, %eax 104 ; CHECK-NEXT: xorl $7, %eax 105 ; CHECK-NEXT: .LBB8_2: # %cond.end 106 ; CHECK-NEXT: # kill 107 ; CHECK-NEXT: retq 108 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false) 109 ret i8 %tmp1 110 } 111 112 define i16 @ctlz_i16_zero_test(i16 %n) { 113 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 114 115 ; CHECK-LABEL: ctlz_i16_zero_test: 116 ; CHECK: # BB#0: 117 ; CHECK-NEXT: movw $16, %ax 118 ; CHECK-NEXT: testw %di, %di 119 ; CHECK-NEXT: je .LBB9_2 120 ; CHECK-NEXT: # BB#1: # %cond.false 121 ; CHECK-NEXT: bsrw %di, %ax 122 ; CHECK-NEXT: xorl $15, %eax 123 ; CHECK-NEXT: .LBB9_2: # %cond.end 124 ; CHECK-NEXT: # kill 125 ; CHECK-NEXT: retq 126 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false) 127 ret i16 %tmp1 128 } 129 130 define i32 @ctlz_i32_zero_test(i32 %n) { 131 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 132 133 ; CHECK-LABEL: ctlz_i32_zero_test: 134 ; CHECK: # BB#0: 135 ; CHECK-NEXT: movl $32, %eax 136 ; CHECK-NEXT: testl %edi, %edi 137 ; CHECK-NEXT: je .LBB10_2 138 ; CHECK-NEXT: # BB#1: # %cond.false 139 ; CHECK-NEXT: bsrl %edi, %eax 140 ; CHECK-NEXT: xorl $31, %eax 141 ; CHECK-NEXT: .LBB10_2: # %cond.end 142 ; CHECK-NEXT: retq 143 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 144 ret i32 %tmp1 145 } 146 147 define i64 @ctlz_i64_zero_test(i64 %n) { 148 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 149 150 ; CHECK-LABEL: ctlz_i64_zero_test: 151 ; CHECK: # BB#0: 152 ; CHECK-NEXT: movl $64, %eax 153 ; CHECK-NEXT: testq %rdi, %rdi 154 ; CHECK-NEXT: je .LBB11_2 155 ; CHECK-NEXT: # BB#1: # %cond.false 156 ; CHECK-NEXT: bsrq %rdi, %rax 157 ; CHECK-NEXT: xorq $63, %rax 158 ; CHECK-NEXT: .LBB11_2: # %cond.end 159 ; CHECK-NEXT: retq 160 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false) 161 ret i64 %tmp1 162 } 163 164 define i8 @cttz_i8_zero_test(i8 %n) { 165 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 166 167 ; CHECK-LABEL: cttz_i8_zero_test: 168 ; CHECK: # BB#0: 169 ; CHECK-NEXT: movb $8, %al 170 ; CHECK-NEXT: testb %dil, %dil 171 ; CHECK-NEXT: je .LBB12_2 172 ; CHECK-NEXT: # BB#1: # %cond.false 173 ; CHECK-NEXT: movzbl %dil, %eax 174 ; CHECK-NEXT: bsfl %eax, %eax 175 ; CHECK-NEXT: .LBB12_2: # %cond.end 176 ; CHECK-NEXT: # kill 177 ; CHECK-NEXT: retq 178 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false) 179 ret i8 %tmp1 180 } 181 182 define i16 @cttz_i16_zero_test(i16 %n) { 183 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 184 185 ; CHECK-LABEL: cttz_i16_zero_test: 186 ; CHECK: # BB#0: 187 ; CHECK-NEXT: movw $16, %ax 188 ; CHECK-NEXT: testw %di, %di 189 ; CHECK-NEXT: je .LBB13_2 190 ; CHECK-NEXT: # BB#1: # %cond.false 191 ; CHECK-NEXT: bsfw %di, %ax 192 ; CHECK-NEXT: .LBB13_2: # %cond.end 193 ; CHECK-NEXT: retq 194 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false) 195 ret i16 %tmp1 196 } 197 198 define i32 @cttz_i32_zero_test(i32 %n) { 199 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 200 201 ; CHECK-LABEL: cttz_i32_zero_test: 202 ; CHECK: # BB#0: 203 ; CHECK-NEXT: movl $32, %eax 204 ; CHECK-NEXT: testl %edi, %edi 205 ; CHECK-NEXT: je .LBB14_2 206 ; CHECK-NEXT: # BB#1: # %cond.false 207 ; CHECK-NEXT: bsfl %edi, %eax 208 ; CHECK-NEXT: .LBB14_2: # %cond.end 209 ; CHECK-NEXT: retq 210 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false) 211 ret i32 %tmp1 212 } 213 214 define i64 @cttz_i64_zero_test(i64 %n) { 215 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 216 217 ; CHECK-LABEL: cttz_i64_zero_test: 218 ; CHECK: # BB#0: 219 ; CHECK-NEXT: movl $64, %eax 220 ; CHECK-NEXT: testq %rdi, %rdi 221 ; CHECK-NEXT: je .LBB15_2 222 ; CHECK-NEXT: # BB#1: # %cond.false 223 ; CHECK-NEXT: bsfq %rdi, %rax 224 ; CHECK-NEXT: .LBB15_2: # %cond.end 225 ; CHECK-NEXT: retq 226 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false) 227 ret i64 %tmp1 228 } 229 230 define i32 @ctlz_i32_fold_cmov(i32 %n) { 231 ; Don't generate the cmovne when the source is known non-zero (and bsr would 232 ; not set ZF). 233 ; rdar://9490949 234 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 235 ; codegen doesn't know how to delete the movl and je. 236 237 ; CHECK-LABEL: ctlz_i32_fold_cmov: 238 ; CHECK: # BB#0: 239 ; CHECK-NEXT: orl $1, %edi 240 ; CHECK-NEXT: movl $32, %eax 241 ; CHECK-NEXT: je .LBB16_2 242 ; CHECK-NEXT: # BB#1: # %cond.false 243 ; CHECK-NEXT: bsrl %edi, %eax 244 ; CHECK-NEXT: xorl $31, %eax 245 ; CHECK-NEXT: .LBB16_2: # %cond.end 246 ; CHECK-NEXT: retq 247 %or = or i32 %n, 1 248 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false) 249 ret i32 %tmp1 250 } 251 252 define i32 @ctlz_bsr(i32 %n) { 253 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute 254 ; the most significant bit, which is what 'bsr' does natively. 255 256 ; CHECK-LABEL: ctlz_bsr: 257 ; CHECK: # BB#0: 258 ; CHECK-NEXT: bsrl %edi, %eax 259 ; CHECK-NEXT: retq 260 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true) 261 %bsr = xor i32 %ctlz, 31 262 ret i32 %bsr 263 } 264 265 define i32 @ctlz_bsr_zero_test(i32 %n) { 266 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 267 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 268 ; codegen doesn't know how to combine the $32 and $31 into $63. 269 270 ; CHECK-LABEL: ctlz_bsr_zero_test: 271 ; CHECK: # BB#0: 272 ; CHECK-NEXT: movl $32, %eax 273 ; CHECK-NEXT: testl %edi, %edi 274 ; CHECK-NEXT: je .LBB18_2 275 ; CHECK-NEXT: # BB#1: # %cond.false 276 ; CHECK-NEXT: bsrl %edi, %eax 277 ; CHECK-NEXT: xorl $31, %eax 278 ; CHECK-NEXT: .LBB18_2: # %cond.end 279 ; CHECK-NEXT: xorl $31, %eax 280 ; CHECK-NEXT: retq 281 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 282 %bsr = xor i32 %ctlz, 31 283 ret i32 %bsr 284 } 285