1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK0,X86-FALLBACK0 3 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK1,X86-FALLBACK1 4 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK2,X86-FALLBACK2 5 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK3,X86-FALLBACK3 6 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK4,X86-FALLBACK4 7 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK0,X64-FALLBACK0 8 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK1,X64-FALLBACK1 9 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK2,X64-FALLBACK2 10 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK3,X64-FALLBACK3 11 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK4,X64-FALLBACK4 12 13 ; Patterns: 14 ; c) x & (-1 << y) 15 ; ic) x & (-1 << (32 - y)) 16 ; d) x >> y << y 17 ; id) x >> (32 - y) << (32 - y) 18 ; are equivalent, but we prefer the second variant if we have BMI2. 19 20 ; ---------------------------------------------------------------------------- ; 21 ; Pattern c. 22 ; ---------------------------------------------------------------------------- ; 23 24 ; 8-bit 25 26 define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind { 27 ; X86-LABEL: clear_lowbits8_c0: 28 ; X86: # %bb.0: 29 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 30 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 31 ; X86-NEXT: shrb %cl, %al 32 ; X86-NEXT: shlb %cl, %al 33 ; X86-NEXT: retl 34 ; 35 ; X64-LABEL: clear_lowbits8_c0: 36 ; X64: # %bb.0: 37 ; X64-NEXT: movl %esi, %ecx 38 ; X64-NEXT: shrb %cl, %dil 39 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx 40 ; X64-NEXT: shlb %cl, %dil 41 ; X64-NEXT: movl %edi, %eax 42 ; X64-NEXT: retq 43 %mask = shl i8 -1, %numlowbits 44 %masked = and i8 %mask, %val 45 ret i8 %masked 46 } 47 48 define i8 @clear_lowbits8_c2_load(i8* %w, i8 %numlowbits) nounwind { 49 ; X86-LABEL: clear_lowbits8_c2_load: 50 ; X86: # %bb.0: 51 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 52 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 53 ; X86-NEXT: movb (%eax), %al 54 ; X86-NEXT: shrb %cl, %al 55 ; X86-NEXT: shlb %cl, %al 56 ; X86-NEXT: retl 57 ; 58 ; X64-LABEL: clear_lowbits8_c2_load: 59 ; X64: # %bb.0: 60 ; X64-NEXT: movl %esi, %ecx 61 ; X64-NEXT: movb (%rdi), %al 62 ; X64-NEXT: shrb %cl, %al 63 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx 64 ; X64-NEXT: shlb %cl, %al 65 ; X64-NEXT: retq 66 %val = load i8, i8* %w 67 %mask = shl i8 -1, %numlowbits 68 %masked = and i8 %mask, %val 69 ret i8 %masked 70 } 71 72 define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind { 73 ; X86-LABEL: clear_lowbits8_c4_commutative: 74 ; X86: # %bb.0: 75 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 76 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 77 ; X86-NEXT: shrb %cl, %al 78 ; X86-NEXT: shlb %cl, %al 79 ; X86-NEXT: retl 80 ; 81 ; X64-LABEL: clear_lowbits8_c4_commutative: 82 ; X64: # %bb.0: 83 ; X64-NEXT: movl %esi, %ecx 84 ; X64-NEXT: shrb %cl, %dil 85 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx 86 ; X64-NEXT: shlb %cl, %dil 87 ; X64-NEXT: movl %edi, %eax 88 ; X64-NEXT: retq 89 %mask = shl i8 -1, %numlowbits 90 %masked = and i8 %val, %mask ; swapped order 91 ret i8 %masked 92 } 93 94 ; 16-bit 95 96 define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind { 97 ; X86-NOBMI2-LABEL: clear_lowbits16_c0: 98 ; X86-NOBMI2: # %bb.0: 99 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 100 ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 101 ; X86-NOBMI2-NEXT: shrl %cl, %eax 102 ; X86-NOBMI2-NEXT: shll %cl, %eax 103 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 104 ; X86-NOBMI2-NEXT: retl 105 ; 106 ; X86-BMI2-LABEL: clear_lowbits16_c0: 107 ; X86-BMI2: # %bb.0: 108 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 109 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 110 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 111 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 112 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 113 ; X86-BMI2-NEXT: retl 114 ; 115 ; X64-NOBMI2-LABEL: clear_lowbits16_c0: 116 ; X64-NOBMI2: # %bb.0: 117 ; X64-NOBMI2-NEXT: movl %esi, %ecx 118 ; X64-NOBMI2-NEXT: movzwl %di, %eax 119 ; X64-NOBMI2-NEXT: shrl %cl, %eax 120 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 121 ; X64-NOBMI2-NEXT: shll %cl, %eax 122 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 123 ; X64-NOBMI2-NEXT: retq 124 ; 125 ; X64-BMI2-LABEL: clear_lowbits16_c0: 126 ; X64-BMI2: # %bb.0: 127 ; X64-BMI2-NEXT: movzwl %di, %eax 128 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 129 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 130 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 131 ; X64-BMI2-NEXT: retq 132 %mask = shl i16 -1, %numlowbits 133 %masked = and i16 %mask, %val 134 ret i16 %masked 135 } 136 137 define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind { 138 ; X86-NOBMI2-LABEL: clear_lowbits16_c1_indexzext: 139 ; X86-NOBMI2: # %bb.0: 140 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 141 ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 142 ; X86-NOBMI2-NEXT: shrl %cl, %eax 143 ; X86-NOBMI2-NEXT: shll %cl, %eax 144 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 145 ; X86-NOBMI2-NEXT: retl 146 ; 147 ; X86-BMI2-LABEL: clear_lowbits16_c1_indexzext: 148 ; X86-BMI2: # %bb.0: 149 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 150 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 151 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 152 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 153 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 154 ; X86-BMI2-NEXT: retl 155 ; 156 ; X64-NOBMI2-LABEL: clear_lowbits16_c1_indexzext: 157 ; X64-NOBMI2: # %bb.0: 158 ; X64-NOBMI2-NEXT: movl %esi, %ecx 159 ; X64-NOBMI2-NEXT: movzwl %di, %eax 160 ; X64-NOBMI2-NEXT: shrl %cl, %eax 161 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 162 ; X64-NOBMI2-NEXT: shll %cl, %eax 163 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 164 ; X64-NOBMI2-NEXT: retq 165 ; 166 ; X64-BMI2-LABEL: clear_lowbits16_c1_indexzext: 167 ; X64-BMI2: # %bb.0: 168 ; X64-BMI2-NEXT: movzwl %di, %eax 169 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 170 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 171 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 172 ; X64-BMI2-NEXT: retq 173 %sh_prom = zext i8 %numlowbits to i16 174 %mask = shl i16 -1, %sh_prom 175 %masked = and i16 %mask, %val 176 ret i16 %masked 177 } 178 179 define i16 @clear_lowbits16_c2_load(i16* %w, i16 %numlowbits) nounwind { 180 ; X86-NOBMI2-LABEL: clear_lowbits16_c2_load: 181 ; X86-NOBMI2: # %bb.0: 182 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 183 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 184 ; X86-NOBMI2-NEXT: movzwl (%eax), %eax 185 ; X86-NOBMI2-NEXT: shrl %cl, %eax 186 ; X86-NOBMI2-NEXT: shll %cl, %eax 187 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 188 ; X86-NOBMI2-NEXT: retl 189 ; 190 ; X86-BMI2-LABEL: clear_lowbits16_c2_load: 191 ; X86-BMI2: # %bb.0: 192 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 193 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 194 ; X86-BMI2-NEXT: movzwl (%ecx), %ecx 195 ; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx 196 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 197 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 198 ; X86-BMI2-NEXT: retl 199 ; 200 ; X64-NOBMI2-LABEL: clear_lowbits16_c2_load: 201 ; X64-NOBMI2: # %bb.0: 202 ; X64-NOBMI2-NEXT: movl %esi, %ecx 203 ; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 204 ; X64-NOBMI2-NEXT: shrl %cl, %eax 205 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 206 ; X64-NOBMI2-NEXT: shll %cl, %eax 207 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 208 ; X64-NOBMI2-NEXT: retq 209 ; 210 ; X64-BMI2-LABEL: clear_lowbits16_c2_load: 211 ; X64-BMI2: # %bb.0: 212 ; X64-BMI2-NEXT: movzwl (%rdi), %eax 213 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 214 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 215 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 216 ; X64-BMI2-NEXT: retq 217 %val = load i16, i16* %w 218 %mask = shl i16 -1, %numlowbits 219 %masked = and i16 %mask, %val 220 ret i16 %masked 221 } 222 223 define i16 @clear_lowbits16_c3_load_indexzext(i16* %w, i8 %numlowbits) nounwind { 224 ; X86-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext: 225 ; X86-NOBMI2: # %bb.0: 226 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 227 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 228 ; X86-NOBMI2-NEXT: movzwl (%eax), %eax 229 ; X86-NOBMI2-NEXT: shrl %cl, %eax 230 ; X86-NOBMI2-NEXT: shll %cl, %eax 231 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 232 ; X86-NOBMI2-NEXT: retl 233 ; 234 ; X86-BMI2-LABEL: clear_lowbits16_c3_load_indexzext: 235 ; X86-BMI2: # %bb.0: 236 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 237 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 238 ; X86-BMI2-NEXT: movzwl (%ecx), %ecx 239 ; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx 240 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 241 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 242 ; X86-BMI2-NEXT: retl 243 ; 244 ; X64-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext: 245 ; X64-NOBMI2: # %bb.0: 246 ; X64-NOBMI2-NEXT: movl %esi, %ecx 247 ; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 248 ; X64-NOBMI2-NEXT: shrl %cl, %eax 249 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 250 ; X64-NOBMI2-NEXT: shll %cl, %eax 251 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 252 ; X64-NOBMI2-NEXT: retq 253 ; 254 ; X64-BMI2-LABEL: clear_lowbits16_c3_load_indexzext: 255 ; X64-BMI2: # %bb.0: 256 ; X64-BMI2-NEXT: movzwl (%rdi), %eax 257 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 258 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 259 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 260 ; X64-BMI2-NEXT: retq 261 %val = load i16, i16* %w 262 %sh_prom = zext i8 %numlowbits to i16 263 %mask = shl i16 -1, %sh_prom 264 %masked = and i16 %mask, %val 265 ret i16 %masked 266 } 267 268 define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind { 269 ; X86-NOBMI2-LABEL: clear_lowbits16_c4_commutative: 270 ; X86-NOBMI2: # %bb.0: 271 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 272 ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 273 ; X86-NOBMI2-NEXT: shrl %cl, %eax 274 ; X86-NOBMI2-NEXT: shll %cl, %eax 275 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 276 ; X86-NOBMI2-NEXT: retl 277 ; 278 ; X86-BMI2-LABEL: clear_lowbits16_c4_commutative: 279 ; X86-BMI2: # %bb.0: 280 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 281 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 282 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 283 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 284 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 285 ; X86-BMI2-NEXT: retl 286 ; 287 ; X64-NOBMI2-LABEL: clear_lowbits16_c4_commutative: 288 ; X64-NOBMI2: # %bb.0: 289 ; X64-NOBMI2-NEXT: movl %esi, %ecx 290 ; X64-NOBMI2-NEXT: movzwl %di, %eax 291 ; X64-NOBMI2-NEXT: shrl %cl, %eax 292 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 293 ; X64-NOBMI2-NEXT: shll %cl, %eax 294 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 295 ; X64-NOBMI2-NEXT: retq 296 ; 297 ; X64-BMI2-LABEL: clear_lowbits16_c4_commutative: 298 ; X64-BMI2: # %bb.0: 299 ; X64-BMI2-NEXT: movzwl %di, %eax 300 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 301 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 302 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 303 ; X64-BMI2-NEXT: retq 304 %mask = shl i16 -1, %numlowbits 305 %masked = and i16 %val, %mask ; swapped order 306 ret i16 %masked 307 } 308 309 ; 32-bit 310 311 define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind { 312 ; X86-NOBMI2-LABEL: clear_lowbits32_c0: 313 ; X86-NOBMI2: # %bb.0: 314 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 315 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 316 ; X86-NOBMI2-NEXT: shrl %cl, %eax 317 ; X86-NOBMI2-NEXT: shll %cl, %eax 318 ; X86-NOBMI2-NEXT: retl 319 ; 320 ; X86-BMI2-LABEL: clear_lowbits32_c0: 321 ; X86-BMI2: # %bb.0: 322 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 323 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 324 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 325 ; X86-BMI2-NEXT: retl 326 ; 327 ; X64-NOBMI2-LABEL: clear_lowbits32_c0: 328 ; X64-NOBMI2: # %bb.0: 329 ; X64-NOBMI2-NEXT: movl %esi, %ecx 330 ; X64-NOBMI2-NEXT: shrl %cl, %edi 331 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 332 ; X64-NOBMI2-NEXT: shll %cl, %edi 333 ; X64-NOBMI2-NEXT: movl %edi, %eax 334 ; X64-NOBMI2-NEXT: retq 335 ; 336 ; X64-BMI2-LABEL: clear_lowbits32_c0: 337 ; X64-BMI2: # %bb.0: 338 ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax 339 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 340 ; X64-BMI2-NEXT: retq 341 %mask = shl i32 -1, %numlowbits 342 %masked = and i32 %mask, %val 343 ret i32 %masked 344 } 345 346 define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { 347 ; X86-NOBMI2-LABEL: clear_lowbits32_c1_indexzext: 348 ; X86-NOBMI2: # %bb.0: 349 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 350 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 351 ; X86-NOBMI2-NEXT: shrl %cl, %eax 352 ; X86-NOBMI2-NEXT: shll %cl, %eax 353 ; X86-NOBMI2-NEXT: retl 354 ; 355 ; X86-BMI2-LABEL: clear_lowbits32_c1_indexzext: 356 ; X86-BMI2: # %bb.0: 357 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 358 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 359 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 360 ; X86-BMI2-NEXT: retl 361 ; 362 ; X64-NOBMI2-LABEL: clear_lowbits32_c1_indexzext: 363 ; X64-NOBMI2: # %bb.0: 364 ; X64-NOBMI2-NEXT: movl %esi, %ecx 365 ; X64-NOBMI2-NEXT: shrl %cl, %edi 366 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 367 ; X64-NOBMI2-NEXT: shll %cl, %edi 368 ; X64-NOBMI2-NEXT: movl %edi, %eax 369 ; X64-NOBMI2-NEXT: retq 370 ; 371 ; X64-BMI2-LABEL: clear_lowbits32_c1_indexzext: 372 ; X64-BMI2: # %bb.0: 373 ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax 374 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 375 ; X64-BMI2-NEXT: retq 376 %sh_prom = zext i8 %numlowbits to i32 377 %mask = shl i32 -1, %sh_prom 378 %masked = and i32 %mask, %val 379 ret i32 %masked 380 } 381 382 define i32 @clear_lowbits32_c2_load(i32* %w, i32 %numlowbits) nounwind { 383 ; X86-NOBMI2-LABEL: clear_lowbits32_c2_load: 384 ; X86-NOBMI2: # %bb.0: 385 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 386 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 387 ; X86-NOBMI2-NEXT: movl (%eax), %eax 388 ; X86-NOBMI2-NEXT: shrl %cl, %eax 389 ; X86-NOBMI2-NEXT: shll %cl, %eax 390 ; X86-NOBMI2-NEXT: retl 391 ; 392 ; X86-BMI2-LABEL: clear_lowbits32_c2_load: 393 ; X86-BMI2: # %bb.0: 394 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 395 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 396 ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 397 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 398 ; X86-BMI2-NEXT: retl 399 ; 400 ; X64-NOBMI2-LABEL: clear_lowbits32_c2_load: 401 ; X64-NOBMI2: # %bb.0: 402 ; X64-NOBMI2-NEXT: movl %esi, %ecx 403 ; X64-NOBMI2-NEXT: movl (%rdi), %eax 404 ; X64-NOBMI2-NEXT: shrl %cl, %eax 405 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 406 ; X64-NOBMI2-NEXT: shll %cl, %eax 407 ; X64-NOBMI2-NEXT: retq 408 ; 409 ; X64-BMI2-LABEL: clear_lowbits32_c2_load: 410 ; X64-BMI2: # %bb.0: 411 ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax 412 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 413 ; X64-BMI2-NEXT: retq 414 %val = load i32, i32* %w 415 %mask = shl i32 -1, %numlowbits 416 %masked = and i32 %mask, %val 417 ret i32 %masked 418 } 419 420 define i32 @clear_lowbits32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 421 ; X86-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext: 422 ; X86-NOBMI2: # %bb.0: 423 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 424 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 425 ; X86-NOBMI2-NEXT: movl (%eax), %eax 426 ; X86-NOBMI2-NEXT: shrl %cl, %eax 427 ; X86-NOBMI2-NEXT: shll %cl, %eax 428 ; X86-NOBMI2-NEXT: retl 429 ; 430 ; X86-BMI2-LABEL: clear_lowbits32_c3_load_indexzext: 431 ; X86-BMI2: # %bb.0: 432 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 433 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 434 ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 435 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 436 ; X86-BMI2-NEXT: retl 437 ; 438 ; X64-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext: 439 ; X64-NOBMI2: # %bb.0: 440 ; X64-NOBMI2-NEXT: movl %esi, %ecx 441 ; X64-NOBMI2-NEXT: movl (%rdi), %eax 442 ; X64-NOBMI2-NEXT: shrl %cl, %eax 443 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 444 ; X64-NOBMI2-NEXT: shll %cl, %eax 445 ; X64-NOBMI2-NEXT: retq 446 ; 447 ; X64-BMI2-LABEL: clear_lowbits32_c3_load_indexzext: 448 ; X64-BMI2: # %bb.0: 449 ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax 450 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 451 ; X64-BMI2-NEXT: retq 452 %val = load i32, i32* %w 453 %sh_prom = zext i8 %numlowbits to i32 454 %mask = shl i32 -1, %sh_prom 455 %masked = and i32 %mask, %val 456 ret i32 %masked 457 } 458 459 define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { 460 ; X86-NOBMI2-LABEL: clear_lowbits32_c4_commutative: 461 ; X86-NOBMI2: # %bb.0: 462 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 463 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 464 ; X86-NOBMI2-NEXT: shrl %cl, %eax 465 ; X86-NOBMI2-NEXT: shll %cl, %eax 466 ; X86-NOBMI2-NEXT: retl 467 ; 468 ; X86-BMI2-LABEL: clear_lowbits32_c4_commutative: 469 ; X86-BMI2: # %bb.0: 470 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 471 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 472 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 473 ; X86-BMI2-NEXT: retl 474 ; 475 ; X64-NOBMI2-LABEL: clear_lowbits32_c4_commutative: 476 ; X64-NOBMI2: # %bb.0: 477 ; X64-NOBMI2-NEXT: movl %esi, %ecx 478 ; X64-NOBMI2-NEXT: shrl %cl, %edi 479 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 480 ; X64-NOBMI2-NEXT: shll %cl, %edi 481 ; X64-NOBMI2-NEXT: movl %edi, %eax 482 ; X64-NOBMI2-NEXT: retq 483 ; 484 ; X64-BMI2-LABEL: clear_lowbits32_c4_commutative: 485 ; X64-BMI2: # %bb.0: 486 ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax 487 ; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 488 ; X64-BMI2-NEXT: retq 489 %mask = shl i32 -1, %numlowbits 490 %masked = and i32 %val, %mask ; swapped order 491 ret i32 %masked 492 } 493 494 ; 64-bit 495 496 define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind { 497 ; X86-NOBMI2-LABEL: clear_lowbits64_c0: 498 ; X86-NOBMI2: # %bb.0: 499 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 500 ; X86-NOBMI2-NEXT: movl $-1, %edx 501 ; X86-NOBMI2-NEXT: movl $-1, %eax 502 ; X86-NOBMI2-NEXT: shll %cl, %eax 503 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 504 ; X86-NOBMI2-NEXT: testb $32, %cl 505 ; X86-NOBMI2-NEXT: je .LBB13_2 506 ; X86-NOBMI2-NEXT: # %bb.1: 507 ; X86-NOBMI2-NEXT: movl %eax, %edx 508 ; X86-NOBMI2-NEXT: xorl %eax, %eax 509 ; X86-NOBMI2-NEXT: .LBB13_2: 510 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 511 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 512 ; X86-NOBMI2-NEXT: retl 513 ; 514 ; X86-BMI2-LABEL: clear_lowbits64_c0: 515 ; X86-BMI2: # %bb.0: 516 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 517 ; X86-BMI2-NEXT: movl $-1, %edx 518 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 519 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 520 ; X86-BMI2-NEXT: testb $32, %cl 521 ; X86-BMI2-NEXT: je .LBB13_2 522 ; X86-BMI2-NEXT: # %bb.1: 523 ; X86-BMI2-NEXT: movl %eax, %edx 524 ; X86-BMI2-NEXT: xorl %eax, %eax 525 ; X86-BMI2-NEXT: .LBB13_2: 526 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 527 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 528 ; X86-BMI2-NEXT: retl 529 ; 530 ; X64-NOBMI2-LABEL: clear_lowbits64_c0: 531 ; X64-NOBMI2: # %bb.0: 532 ; X64-NOBMI2-NEXT: movq %rsi, %rcx 533 ; X64-NOBMI2-NEXT: shrq %cl, %rdi 534 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 535 ; X64-NOBMI2-NEXT: shlq %cl, %rdi 536 ; X64-NOBMI2-NEXT: movq %rdi, %rax 537 ; X64-NOBMI2-NEXT: retq 538 ; 539 ; X64-BMI2-LABEL: clear_lowbits64_c0: 540 ; X64-BMI2: # %bb.0: 541 ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax 542 ; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 543 ; X64-BMI2-NEXT: retq 544 %mask = shl i64 -1, %numlowbits 545 %masked = and i64 %mask, %val 546 ret i64 %masked 547 } 548 549 define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { 550 ; X86-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: 551 ; X86-NOBMI2: # %bb.0: 552 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 553 ; X86-NOBMI2-NEXT: movl $-1, %edx 554 ; X86-NOBMI2-NEXT: movl $-1, %eax 555 ; X86-NOBMI2-NEXT: shll %cl, %eax 556 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 557 ; X86-NOBMI2-NEXT: testb $32, %cl 558 ; X86-NOBMI2-NEXT: je .LBB14_2 559 ; X86-NOBMI2-NEXT: # %bb.1: 560 ; X86-NOBMI2-NEXT: movl %eax, %edx 561 ; X86-NOBMI2-NEXT: xorl %eax, %eax 562 ; X86-NOBMI2-NEXT: .LBB14_2: 563 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 564 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 565 ; X86-NOBMI2-NEXT: retl 566 ; 567 ; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext: 568 ; X86-BMI2: # %bb.0: 569 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 570 ; X86-BMI2-NEXT: movl $-1, %edx 571 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 572 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 573 ; X86-BMI2-NEXT: testb $32, %cl 574 ; X86-BMI2-NEXT: je .LBB14_2 575 ; X86-BMI2-NEXT: # %bb.1: 576 ; X86-BMI2-NEXT: movl %eax, %edx 577 ; X86-BMI2-NEXT: xorl %eax, %eax 578 ; X86-BMI2-NEXT: .LBB14_2: 579 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 580 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 581 ; X86-BMI2-NEXT: retl 582 ; 583 ; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: 584 ; X64-NOBMI2: # %bb.0: 585 ; X64-NOBMI2-NEXT: movl %esi, %ecx 586 ; X64-NOBMI2-NEXT: shrq %cl, %rdi 587 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 588 ; X64-NOBMI2-NEXT: shlq %cl, %rdi 589 ; X64-NOBMI2-NEXT: movq %rdi, %rax 590 ; X64-NOBMI2-NEXT: retq 591 ; 592 ; X64-BMI2-LABEL: clear_lowbits64_c1_indexzext: 593 ; X64-BMI2: # %bb.0: 594 ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 595 ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax 596 ; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 597 ; X64-BMI2-NEXT: retq 598 %sh_prom = zext i8 %numlowbits to i64 599 %mask = shl i64 -1, %sh_prom 600 %masked = and i64 %mask, %val 601 ret i64 %masked 602 } 603 604 define i64 @clear_lowbits64_c2_load(i64* %w, i64 %numlowbits) nounwind { 605 ; X86-NOBMI2-LABEL: clear_lowbits64_c2_load: 606 ; X86-NOBMI2: # %bb.0: 607 ; X86-NOBMI2-NEXT: pushl %esi 608 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 609 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 610 ; X86-NOBMI2-NEXT: movl $-1, %edx 611 ; X86-NOBMI2-NEXT: movl $-1, %eax 612 ; X86-NOBMI2-NEXT: shll %cl, %eax 613 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 614 ; X86-NOBMI2-NEXT: testb $32, %cl 615 ; X86-NOBMI2-NEXT: je .LBB15_2 616 ; X86-NOBMI2-NEXT: # %bb.1: 617 ; X86-NOBMI2-NEXT: movl %eax, %edx 618 ; X86-NOBMI2-NEXT: xorl %eax, %eax 619 ; X86-NOBMI2-NEXT: .LBB15_2: 620 ; X86-NOBMI2-NEXT: andl 4(%esi), %edx 621 ; X86-NOBMI2-NEXT: andl (%esi), %eax 622 ; X86-NOBMI2-NEXT: popl %esi 623 ; X86-NOBMI2-NEXT: retl 624 ; 625 ; X86-BMI2-LABEL: clear_lowbits64_c2_load: 626 ; X86-BMI2: # %bb.0: 627 ; X86-BMI2-NEXT: pushl %esi 628 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 629 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 630 ; X86-BMI2-NEXT: movl $-1, %edx 631 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 632 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 633 ; X86-BMI2-NEXT: testb $32, %cl 634 ; X86-BMI2-NEXT: je .LBB15_2 635 ; X86-BMI2-NEXT: # %bb.1: 636 ; X86-BMI2-NEXT: movl %eax, %edx 637 ; X86-BMI2-NEXT: xorl %eax, %eax 638 ; X86-BMI2-NEXT: .LBB15_2: 639 ; X86-BMI2-NEXT: andl 4(%esi), %edx 640 ; X86-BMI2-NEXT: andl (%esi), %eax 641 ; X86-BMI2-NEXT: popl %esi 642 ; X86-BMI2-NEXT: retl 643 ; 644 ; X64-NOBMI2-LABEL: clear_lowbits64_c2_load: 645 ; X64-NOBMI2: # %bb.0: 646 ; X64-NOBMI2-NEXT: movq %rsi, %rcx 647 ; X64-NOBMI2-NEXT: movq (%rdi), %rax 648 ; X64-NOBMI2-NEXT: shrq %cl, %rax 649 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 650 ; X64-NOBMI2-NEXT: shlq %cl, %rax 651 ; X64-NOBMI2-NEXT: retq 652 ; 653 ; X64-BMI2-LABEL: clear_lowbits64_c2_load: 654 ; X64-BMI2: # %bb.0: 655 ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax 656 ; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 657 ; X64-BMI2-NEXT: retq 658 %val = load i64, i64* %w 659 %mask = shl i64 -1, %numlowbits 660 %masked = and i64 %mask, %val 661 ret i64 %masked 662 } 663 664 define i64 @clear_lowbits64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 665 ; X86-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext: 666 ; X86-NOBMI2: # %bb.0: 667 ; X86-NOBMI2-NEXT: pushl %esi 668 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 669 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 670 ; X86-NOBMI2-NEXT: movl $-1, %edx 671 ; X86-NOBMI2-NEXT: movl $-1, %eax 672 ; X86-NOBMI2-NEXT: shll %cl, %eax 673 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 674 ; X86-NOBMI2-NEXT: testb $32, %cl 675 ; X86-NOBMI2-NEXT: je .LBB16_2 676 ; X86-NOBMI2-NEXT: # %bb.1: 677 ; X86-NOBMI2-NEXT: movl %eax, %edx 678 ; X86-NOBMI2-NEXT: xorl %eax, %eax 679 ; X86-NOBMI2-NEXT: .LBB16_2: 680 ; X86-NOBMI2-NEXT: andl 4(%esi), %edx 681 ; X86-NOBMI2-NEXT: andl (%esi), %eax 682 ; X86-NOBMI2-NEXT: popl %esi 683 ; X86-NOBMI2-NEXT: retl 684 ; 685 ; X86-BMI2-LABEL: clear_lowbits64_c3_load_indexzext: 686 ; X86-BMI2: # %bb.0: 687 ; X86-BMI2-NEXT: pushl %esi 688 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 689 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 690 ; X86-BMI2-NEXT: movl $-1, %edx 691 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 692 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 693 ; X86-BMI2-NEXT: testb $32, %cl 694 ; X86-BMI2-NEXT: je .LBB16_2 695 ; X86-BMI2-NEXT: # %bb.1: 696 ; X86-BMI2-NEXT: movl %eax, %edx 697 ; X86-BMI2-NEXT: xorl %eax, %eax 698 ; X86-BMI2-NEXT: .LBB16_2: 699 ; X86-BMI2-NEXT: andl 4(%esi), %edx 700 ; X86-BMI2-NEXT: andl (%esi), %eax 701 ; X86-BMI2-NEXT: popl %esi 702 ; X86-BMI2-NEXT: retl 703 ; 704 ; X64-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext: 705 ; X64-NOBMI2: # %bb.0: 706 ; X64-NOBMI2-NEXT: movl %esi, %ecx 707 ; X64-NOBMI2-NEXT: movq (%rdi), %rax 708 ; X64-NOBMI2-NEXT: shrq %cl, %rax 709 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 710 ; X64-NOBMI2-NEXT: shlq %cl, %rax 711 ; X64-NOBMI2-NEXT: retq 712 ; 713 ; X64-BMI2-LABEL: clear_lowbits64_c3_load_indexzext: 714 ; X64-BMI2: # %bb.0: 715 ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 716 ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax 717 ; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 718 ; X64-BMI2-NEXT: retq 719 %val = load i64, i64* %w 720 %sh_prom = zext i8 %numlowbits to i64 721 %mask = shl i64 -1, %sh_prom 722 %masked = and i64 %mask, %val 723 ret i64 %masked 724 } 725 726 define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { 727 ; X86-NOBMI2-LABEL: clear_lowbits64_c4_commutative: 728 ; X86-NOBMI2: # %bb.0: 729 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 730 ; X86-NOBMI2-NEXT: movl $-1, %edx 731 ; X86-NOBMI2-NEXT: movl $-1, %eax 732 ; X86-NOBMI2-NEXT: shll %cl, %eax 733 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 734 ; X86-NOBMI2-NEXT: testb $32, %cl 735 ; X86-NOBMI2-NEXT: je .LBB17_2 736 ; X86-NOBMI2-NEXT: # %bb.1: 737 ; X86-NOBMI2-NEXT: movl %eax, %edx 738 ; X86-NOBMI2-NEXT: xorl %eax, %eax 739 ; X86-NOBMI2-NEXT: .LBB17_2: 740 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 741 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 742 ; X86-NOBMI2-NEXT: retl 743 ; 744 ; X86-BMI2-LABEL: clear_lowbits64_c4_commutative: 745 ; X86-BMI2: # %bb.0: 746 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 747 ; X86-BMI2-NEXT: movl $-1, %edx 748 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 749 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 750 ; X86-BMI2-NEXT: testb $32, %cl 751 ; X86-BMI2-NEXT: je .LBB17_2 752 ; X86-BMI2-NEXT: # %bb.1: 753 ; X86-BMI2-NEXT: movl %eax, %edx 754 ; X86-BMI2-NEXT: xorl %eax, %eax 755 ; X86-BMI2-NEXT: .LBB17_2: 756 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 757 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 758 ; X86-BMI2-NEXT: retl 759 ; 760 ; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative: 761 ; X64-NOBMI2: # %bb.0: 762 ; X64-NOBMI2-NEXT: movq %rsi, %rcx 763 ; X64-NOBMI2-NEXT: shrq %cl, %rdi 764 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 765 ; X64-NOBMI2-NEXT: shlq %cl, %rdi 766 ; X64-NOBMI2-NEXT: movq %rdi, %rax 767 ; X64-NOBMI2-NEXT: retq 768 ; 769 ; X64-BMI2-LABEL: clear_lowbits64_c4_commutative: 770 ; X64-BMI2: # %bb.0: 771 ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax 772 ; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 773 ; X64-BMI2-NEXT: retq 774 %mask = shl i64 -1, %numlowbits 775 %masked = and i64 %val, %mask ; swapped order 776 ret i64 %masked 777 } 778 779 ; ---------------------------------------------------------------------------- ; 780 ; Pattern ic. 781 ; ---------------------------------------------------------------------------- ; 782 783 ; 8-bit 784 785 define i8 @clear_lowbits8_ic0(i8 %val, i8 %numlowbits) nounwind { 786 ; X86-LABEL: clear_lowbits8_ic0: 787 ; X86: # %bb.0: 788 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 789 ; X86-NEXT: movb $8, %cl 790 ; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 791 ; X86-NEXT: shrb %cl, %al 792 ; X86-NEXT: shlb %cl, %al 793 ; X86-NEXT: retl 794 ; 795 ; X64-LABEL: clear_lowbits8_ic0: 796 ; X64: # %bb.0: 797 ; X64-NEXT: movb $8, %cl 798 ; X64-NEXT: subb %sil, %cl 799 ; X64-NEXT: shrb %cl, %dil 800 ; X64-NEXT: shlb %cl, %dil 801 ; X64-NEXT: movl %edi, %eax 802 ; X64-NEXT: retq 803 %numhighbits = sub i8 8, %numlowbits 804 %mask = shl i8 -1, %numhighbits 805 %masked = and i8 %mask, %val 806 ret i8 %masked 807 } 808 809 define i8 @clear_lowbits8_ic2_load(i8* %w, i8 %numlowbits) nounwind { 810 ; X86-LABEL: clear_lowbits8_ic2_load: 811 ; X86: # %bb.0: 812 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 813 ; X86-NEXT: movb (%eax), %al 814 ; X86-NEXT: movb $8, %cl 815 ; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 816 ; X86-NEXT: shrb %cl, %al 817 ; X86-NEXT: shlb %cl, %al 818 ; X86-NEXT: retl 819 ; 820 ; X64-LABEL: clear_lowbits8_ic2_load: 821 ; X64: # %bb.0: 822 ; X64-NEXT: movb (%rdi), %al 823 ; X64-NEXT: movb $8, %cl 824 ; X64-NEXT: subb %sil, %cl 825 ; X64-NEXT: shrb %cl, %al 826 ; X64-NEXT: shlb %cl, %al 827 ; X64-NEXT: retq 828 %val = load i8, i8* %w 829 %numhighbits = sub i8 8, %numlowbits 830 %mask = shl i8 -1, %numhighbits 831 %masked = and i8 %mask, %val 832 ret i8 %masked 833 } 834 835 define i8 @clear_lowbits8_ic4_commutative(i8 %val, i8 %numlowbits) nounwind { 836 ; X86-LABEL: clear_lowbits8_ic4_commutative: 837 ; X86: # %bb.0: 838 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 839 ; X86-NEXT: movb $8, %cl 840 ; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 841 ; X86-NEXT: shrb %cl, %al 842 ; X86-NEXT: shlb %cl, %al 843 ; X86-NEXT: retl 844 ; 845 ; X64-LABEL: clear_lowbits8_ic4_commutative: 846 ; X64: # %bb.0: 847 ; X64-NEXT: movb $8, %cl 848 ; X64-NEXT: subb %sil, %cl 849 ; X64-NEXT: shrb %cl, %dil 850 ; X64-NEXT: shlb %cl, %dil 851 ; X64-NEXT: movl %edi, %eax 852 ; X64-NEXT: retq 853 %numhighbits = sub i8 8, %numlowbits 854 %mask = shl i8 -1, %numhighbits 855 %masked = and i8 %val, %mask ; swapped order 856 ret i8 %masked 857 } 858 859 ; 16-bit 860 861 define i16 @clear_lowbits16_ic0(i16 %val, i16 %numlowbits) nounwind { 862 ; X86-NOBMI2-LABEL: clear_lowbits16_ic0: 863 ; X86-NOBMI2: # %bb.0: 864 ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 865 ; X86-NOBMI2-NEXT: movw $16, %cx 866 ; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 867 ; X86-NOBMI2-NEXT: shrl %cl, %eax 868 ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx 869 ; X86-NOBMI2-NEXT: shll %cl, %eax 870 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 871 ; X86-NOBMI2-NEXT: retl 872 ; 873 ; X86-BMI2-LABEL: clear_lowbits16_ic0: 874 ; X86-BMI2: # %bb.0: 875 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 876 ; X86-BMI2-NEXT: movw $16, %cx 877 ; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 878 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 879 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 880 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 881 ; X86-BMI2-NEXT: retl 882 ; 883 ; X64-NOBMI2-LABEL: clear_lowbits16_ic0: 884 ; X64-NOBMI2: # %bb.0: 885 ; X64-NOBMI2-NEXT: movzwl %di, %eax 886 ; X64-NOBMI2-NEXT: movl $16, %ecx 887 ; X64-NOBMI2-NEXT: subl %esi, %ecx 888 ; X64-NOBMI2-NEXT: shrl %cl, %eax 889 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 890 ; X64-NOBMI2-NEXT: shll %cl, %eax 891 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 892 ; X64-NOBMI2-NEXT: retq 893 ; 894 ; X64-BMI2-LABEL: clear_lowbits16_ic0: 895 ; X64-BMI2: # %bb.0: 896 ; X64-BMI2-NEXT: movzwl %di, %eax 897 ; X64-BMI2-NEXT: movl $16, %ecx 898 ; X64-BMI2-NEXT: subl %esi, %ecx 899 ; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 900 ; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 901 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 902 ; X64-BMI2-NEXT: retq 903 %numhighbits = sub i16 16, %numlowbits 904 %mask = shl i16 -1, %numhighbits 905 %masked = and i16 %mask, %val 906 ret i16 %masked 907 } 908 909 define i16 @clear_lowbits16_ic1_indexzext(i16 %val, i8 %numlowbits) nounwind { 910 ; X86-NOBMI2-LABEL: clear_lowbits16_ic1_indexzext: 911 ; X86-NOBMI2: # %bb.0: 912 ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 913 ; X86-NOBMI2-NEXT: movb $16, %cl 914 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 915 ; X86-NOBMI2-NEXT: shrl %cl, %eax 916 ; X86-NOBMI2-NEXT: shll %cl, %eax 917 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 918 ; X86-NOBMI2-NEXT: retl 919 ; 920 ; X86-BMI2-LABEL: clear_lowbits16_ic1_indexzext: 921 ; X86-BMI2: # %bb.0: 922 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 923 ; X86-BMI2-NEXT: movb $16, %cl 924 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 925 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 926 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 927 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 928 ; X86-BMI2-NEXT: retl 929 ; 930 ; X64-NOBMI2-LABEL: clear_lowbits16_ic1_indexzext: 931 ; X64-NOBMI2: # %bb.0: 932 ; X64-NOBMI2-NEXT: movzwl %di, %eax 933 ; X64-NOBMI2-NEXT: movb $16, %cl 934 ; X64-NOBMI2-NEXT: subb %sil, %cl 935 ; X64-NOBMI2-NEXT: shrl %cl, %eax 936 ; X64-NOBMI2-NEXT: shll %cl, %eax 937 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 938 ; X64-NOBMI2-NEXT: retq 939 ; 940 ; X64-BMI2-LABEL: clear_lowbits16_ic1_indexzext: 941 ; X64-BMI2: # %bb.0: 942 ; X64-BMI2-NEXT: movzwl %di, %eax 943 ; X64-BMI2-NEXT: movb $16, %cl 944 ; X64-BMI2-NEXT: subb %sil, %cl 945 ; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 946 ; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 947 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 948 ; X64-BMI2-NEXT: retq 949 %numhighbits = sub i8 16, %numlowbits 950 %sh_prom = zext i8 %numhighbits to i16 951 %mask = shl i16 -1, %sh_prom 952 %masked = and i16 %mask, %val 953 ret i16 %masked 954 } 955 956 define i16 @clear_lowbits16_ic2_load(i16* %w, i16 %numlowbits) nounwind { 957 ; X86-NOBMI2-LABEL: clear_lowbits16_ic2_load: 958 ; X86-NOBMI2: # %bb.0: 959 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 960 ; X86-NOBMI2-NEXT: movzwl (%eax), %eax 961 ; X86-NOBMI2-NEXT: movw $16, %cx 962 ; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 963 ; X86-NOBMI2-NEXT: shrl %cl, %eax 964 ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx 965 ; X86-NOBMI2-NEXT: shll %cl, %eax 966 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 967 ; X86-NOBMI2-NEXT: retl 968 ; 969 ; X86-BMI2-LABEL: clear_lowbits16_ic2_load: 970 ; X86-BMI2: # %bb.0: 971 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 972 ; X86-BMI2-NEXT: movzwl (%eax), %eax 973 ; X86-BMI2-NEXT: movw $16, %cx 974 ; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 975 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 976 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 977 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 978 ; X86-BMI2-NEXT: retl 979 ; 980 ; X64-NOBMI2-LABEL: clear_lowbits16_ic2_load: 981 ; X64-NOBMI2: # %bb.0: 982 ; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 983 ; X64-NOBMI2-NEXT: movl $16, %ecx 984 ; X64-NOBMI2-NEXT: subl %esi, %ecx 985 ; X64-NOBMI2-NEXT: shrl %cl, %eax 986 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 987 ; X64-NOBMI2-NEXT: shll %cl, %eax 988 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 989 ; X64-NOBMI2-NEXT: retq 990 ; 991 ; X64-BMI2-LABEL: clear_lowbits16_ic2_load: 992 ; X64-BMI2: # %bb.0: 993 ; X64-BMI2-NEXT: movzwl (%rdi), %eax 994 ; X64-BMI2-NEXT: movl $16, %ecx 995 ; X64-BMI2-NEXT: subl %esi, %ecx 996 ; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 997 ; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 998 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 999 ; X64-BMI2-NEXT: retq 1000 %val = load i16, i16* %w 1001 %numhighbits = sub i16 16, %numlowbits 1002 %mask = shl i16 -1, %numhighbits 1003 %masked = and i16 %mask, %val 1004 ret i16 %masked 1005 } 1006 1007 define i16 @clear_lowbits16_ic3_load_indexzext(i16* %w, i8 %numlowbits) nounwind { 1008 ; X86-NOBMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1009 ; X86-NOBMI2: # %bb.0: 1010 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1011 ; X86-NOBMI2-NEXT: movzwl (%eax), %eax 1012 ; X86-NOBMI2-NEXT: movb $16, %cl 1013 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1014 ; X86-NOBMI2-NEXT: shrl %cl, %eax 1015 ; X86-NOBMI2-NEXT: shll %cl, %eax 1016 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1017 ; X86-NOBMI2-NEXT: retl 1018 ; 1019 ; X86-BMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1020 ; X86-BMI2: # %bb.0: 1021 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1022 ; X86-BMI2-NEXT: movzwl (%eax), %eax 1023 ; X86-BMI2-NEXT: movb $16, %cl 1024 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1025 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 1026 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1027 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1028 ; X86-BMI2-NEXT: retl 1029 ; 1030 ; X64-NOBMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1031 ; X64-NOBMI2: # %bb.0: 1032 ; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 1033 ; X64-NOBMI2-NEXT: movb $16, %cl 1034 ; X64-NOBMI2-NEXT: subb %sil, %cl 1035 ; X64-NOBMI2-NEXT: shrl %cl, %eax 1036 ; X64-NOBMI2-NEXT: shll %cl, %eax 1037 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1038 ; X64-NOBMI2-NEXT: retq 1039 ; 1040 ; X64-BMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1041 ; X64-BMI2: # %bb.0: 1042 ; X64-BMI2-NEXT: movzwl (%rdi), %eax 1043 ; X64-BMI2-NEXT: movb $16, %cl 1044 ; X64-BMI2-NEXT: subb %sil, %cl 1045 ; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 1046 ; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 1047 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1048 ; X64-BMI2-NEXT: retq 1049 %val = load i16, i16* %w 1050 %numhighbits = sub i8 16, %numlowbits 1051 %sh_prom = zext i8 %numhighbits to i16 1052 %mask = shl i16 -1, %sh_prom 1053 %masked = and i16 %mask, %val 1054 ret i16 %masked 1055 } 1056 1057 define i16 @clear_lowbits16_ic4_commutative(i16 %val, i16 %numlowbits) nounwind { 1058 ; X86-NOBMI2-LABEL: clear_lowbits16_ic4_commutative: 1059 ; X86-NOBMI2: # %bb.0: 1060 ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1061 ; X86-NOBMI2-NEXT: movw $16, %cx 1062 ; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 1063 ; X86-NOBMI2-NEXT: shrl %cl, %eax 1064 ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx 1065 ; X86-NOBMI2-NEXT: shll %cl, %eax 1066 ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1067 ; X86-NOBMI2-NEXT: retl 1068 ; 1069 ; X86-BMI2-LABEL: clear_lowbits16_ic4_commutative: 1070 ; X86-BMI2: # %bb.0: 1071 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1072 ; X86-BMI2-NEXT: movw $16, %cx 1073 ; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 1074 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 1075 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1076 ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1077 ; X86-BMI2-NEXT: retl 1078 ; 1079 ; X64-NOBMI2-LABEL: clear_lowbits16_ic4_commutative: 1080 ; X64-NOBMI2: # %bb.0: 1081 ; X64-NOBMI2-NEXT: movzwl %di, %eax 1082 ; X64-NOBMI2-NEXT: movl $16, %ecx 1083 ; X64-NOBMI2-NEXT: subl %esi, %ecx 1084 ; X64-NOBMI2-NEXT: shrl %cl, %eax 1085 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1086 ; X64-NOBMI2-NEXT: shll %cl, %eax 1087 ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1088 ; X64-NOBMI2-NEXT: retq 1089 ; 1090 ; X64-BMI2-LABEL: clear_lowbits16_ic4_commutative: 1091 ; X64-BMI2: # %bb.0: 1092 ; X64-BMI2-NEXT: movzwl %di, %eax 1093 ; X64-BMI2-NEXT: movl $16, %ecx 1094 ; X64-BMI2-NEXT: subl %esi, %ecx 1095 ; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 1096 ; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 1097 ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1098 ; X64-BMI2-NEXT: retq 1099 %numhighbits = sub i16 16, %numlowbits 1100 %mask = shl i16 -1, %numhighbits 1101 %masked = and i16 %val, %mask ; swapped order 1102 ret i16 %masked 1103 } 1104 1105 ; 32-bit 1106 1107 define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind { 1108 ; X86-NOBMI2-LABEL: clear_lowbits32_ic0: 1109 ; X86-NOBMI2: # %bb.0: 1110 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1111 ; X86-NOBMI2-NEXT: movl $32, %ecx 1112 ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1113 ; X86-NOBMI2-NEXT: shrl %cl, %eax 1114 ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1115 ; X86-NOBMI2-NEXT: shll %cl, %eax 1116 ; X86-NOBMI2-NEXT: retl 1117 ; 1118 ; X86-BMI2-LABEL: clear_lowbits32_ic0: 1119 ; X86-BMI2: # %bb.0: 1120 ; X86-BMI2-NEXT: movl $32, %eax 1121 ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax 1122 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 1123 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 1124 ; X86-BMI2-NEXT: retl 1125 ; 1126 ; X64-NOBMI2-LABEL: clear_lowbits32_ic0: 1127 ; X64-NOBMI2: # %bb.0: 1128 ; X64-NOBMI2-NEXT: movl $32, %ecx 1129 ; X64-NOBMI2-NEXT: subl %esi, %ecx 1130 ; X64-NOBMI2-NEXT: shrl %cl, %edi 1131 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1132 ; X64-NOBMI2-NEXT: shll %cl, %edi 1133 ; X64-NOBMI2-NEXT: movl %edi, %eax 1134 ; X64-NOBMI2-NEXT: retq 1135 ; 1136 ; X64-BMI2-LABEL: clear_lowbits32_ic0: 1137 ; X64-BMI2: # %bb.0: 1138 ; X64-BMI2-NEXT: movl $32, %eax 1139 ; X64-BMI2-NEXT: subl %esi, %eax 1140 ; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx 1141 ; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1142 ; X64-BMI2-NEXT: retq 1143 %numhighbits = sub i32 32, %numlowbits 1144 %mask = shl i32 -1, %numhighbits 1145 %masked = and i32 %mask, %val 1146 ret i32 %masked 1147 } 1148 1149 define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind { 1150 ; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: 1151 ; X86-NOBMI2: # %bb.0: 1152 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1153 ; X86-NOBMI2-NEXT: movb $32, %cl 1154 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1155 ; X86-NOBMI2-NEXT: shrl %cl, %eax 1156 ; X86-NOBMI2-NEXT: shll %cl, %eax 1157 ; X86-NOBMI2-NEXT: retl 1158 ; 1159 ; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext: 1160 ; X86-BMI2: # %bb.0: 1161 ; X86-BMI2-NEXT: movb $32, %al 1162 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 1163 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 1164 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 1165 ; X86-BMI2-NEXT: retl 1166 ; 1167 ; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: 1168 ; X64-NOBMI2: # %bb.0: 1169 ; X64-NOBMI2-NEXT: movb $32, %cl 1170 ; X64-NOBMI2-NEXT: subb %sil, %cl 1171 ; X64-NOBMI2-NEXT: shrl %cl, %edi 1172 ; X64-NOBMI2-NEXT: shll %cl, %edi 1173 ; X64-NOBMI2-NEXT: movl %edi, %eax 1174 ; X64-NOBMI2-NEXT: retq 1175 ; 1176 ; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext: 1177 ; X64-BMI2: # %bb.0: 1178 ; X64-BMI2-NEXT: movb $32, %al 1179 ; X64-BMI2-NEXT: subb %sil, %al 1180 ; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx 1181 ; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1182 ; X64-BMI2-NEXT: retq 1183 %numhighbits = sub i8 32, %numlowbits 1184 %sh_prom = zext i8 %numhighbits to i32 1185 %mask = shl i32 -1, %sh_prom 1186 %masked = and i32 %mask, %val 1187 ret i32 %masked 1188 } 1189 1190 define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind { 1191 ; X86-NOBMI2-LABEL: clear_lowbits32_ic2_load: 1192 ; X86-NOBMI2: # %bb.0: 1193 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1194 ; X86-NOBMI2-NEXT: movl (%eax), %eax 1195 ; X86-NOBMI2-NEXT: movl $32, %ecx 1196 ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1197 ; X86-NOBMI2-NEXT: shrl %cl, %eax 1198 ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1199 ; X86-NOBMI2-NEXT: shll %cl, %eax 1200 ; X86-NOBMI2-NEXT: retl 1201 ; 1202 ; X86-BMI2-LABEL: clear_lowbits32_ic2_load: 1203 ; X86-BMI2: # %bb.0: 1204 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1205 ; X86-BMI2-NEXT: movl $32, %ecx 1206 ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1207 ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 1208 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1209 ; X86-BMI2-NEXT: retl 1210 ; 1211 ; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load: 1212 ; X64-NOBMI2: # %bb.0: 1213 ; X64-NOBMI2-NEXT: movl (%rdi), %eax 1214 ; X64-NOBMI2-NEXT: movl $32, %ecx 1215 ; X64-NOBMI2-NEXT: subl %esi, %ecx 1216 ; X64-NOBMI2-NEXT: shrl %cl, %eax 1217 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1218 ; X64-NOBMI2-NEXT: shll %cl, %eax 1219 ; X64-NOBMI2-NEXT: retq 1220 ; 1221 ; X64-BMI2-LABEL: clear_lowbits32_ic2_load: 1222 ; X64-BMI2: # %bb.0: 1223 ; X64-BMI2-NEXT: movl $32, %eax 1224 ; X64-BMI2-NEXT: subl %esi, %eax 1225 ; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx 1226 ; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1227 ; X64-BMI2-NEXT: retq 1228 %val = load i32, i32* %w 1229 %numhighbits = sub i32 32, %numlowbits 1230 %mask = shl i32 -1, %numhighbits 1231 %masked = and i32 %mask, %val 1232 ret i32 %masked 1233 } 1234 1235 define i32 @clear_lowbits32_ic3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 1236 ; X86-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1237 ; X86-NOBMI2: # %bb.0: 1238 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1239 ; X86-NOBMI2-NEXT: movl (%eax), %eax 1240 ; X86-NOBMI2-NEXT: movb $32, %cl 1241 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1242 ; X86-NOBMI2-NEXT: shrl %cl, %eax 1243 ; X86-NOBMI2-NEXT: shll %cl, %eax 1244 ; X86-NOBMI2-NEXT: retl 1245 ; 1246 ; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1247 ; X86-BMI2: # %bb.0: 1248 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1249 ; X86-BMI2-NEXT: movb $32, %cl 1250 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1251 ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 1252 ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1253 ; X86-BMI2-NEXT: retl 1254 ; 1255 ; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1256 ; X64-NOBMI2: # %bb.0: 1257 ; X64-NOBMI2-NEXT: movl (%rdi), %eax 1258 ; X64-NOBMI2-NEXT: movb $32, %cl 1259 ; X64-NOBMI2-NEXT: subb %sil, %cl 1260 ; X64-NOBMI2-NEXT: shrl %cl, %eax 1261 ; X64-NOBMI2-NEXT: shll %cl, %eax 1262 ; X64-NOBMI2-NEXT: retq 1263 ; 1264 ; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1265 ; X64-BMI2: # %bb.0: 1266 ; X64-BMI2-NEXT: movb $32, %al 1267 ; X64-BMI2-NEXT: subb %sil, %al 1268 ; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx 1269 ; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1270 ; X64-BMI2-NEXT: retq 1271 %val = load i32, i32* %w 1272 %numhighbits = sub i8 32, %numlowbits 1273 %sh_prom = zext i8 %numhighbits to i32 1274 %mask = shl i32 -1, %sh_prom 1275 %masked = and i32 %mask, %val 1276 ret i32 %masked 1277 } 1278 1279 define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind { 1280 ; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: 1281 ; X86-NOBMI2: # %bb.0: 1282 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1283 ; X86-NOBMI2-NEXT: movl $32, %ecx 1284 ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1285 ; X86-NOBMI2-NEXT: shrl %cl, %eax 1286 ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1287 ; X86-NOBMI2-NEXT: shll %cl, %eax 1288 ; X86-NOBMI2-NEXT: retl 1289 ; 1290 ; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative: 1291 ; X86-BMI2: # %bb.0: 1292 ; X86-BMI2-NEXT: movl $32, %eax 1293 ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax 1294 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 1295 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 1296 ; X86-BMI2-NEXT: retl 1297 ; 1298 ; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: 1299 ; X64-NOBMI2: # %bb.0: 1300 ; X64-NOBMI2-NEXT: movl $32, %ecx 1301 ; X64-NOBMI2-NEXT: subl %esi, %ecx 1302 ; X64-NOBMI2-NEXT: shrl %cl, %edi 1303 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1304 ; X64-NOBMI2-NEXT: shll %cl, %edi 1305 ; X64-NOBMI2-NEXT: movl %edi, %eax 1306 ; X64-NOBMI2-NEXT: retq 1307 ; 1308 ; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative: 1309 ; X64-BMI2: # %bb.0: 1310 ; X64-BMI2-NEXT: movl $32, %eax 1311 ; X64-BMI2-NEXT: subl %esi, %eax 1312 ; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx 1313 ; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1314 ; X64-BMI2-NEXT: retq 1315 %numhighbits = sub i32 32, %numlowbits 1316 %mask = shl i32 -1, %numhighbits 1317 %masked = and i32 %val, %mask ; swapped order 1318 ret i32 %masked 1319 } 1320 1321 ; 64-bit 1322 1323 define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind { 1324 ; X86-NOBMI2-LABEL: clear_lowbits64_ic0: 1325 ; X86-NOBMI2: # %bb.0: 1326 ; X86-NOBMI2-NEXT: movl $64, %ecx 1327 ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1328 ; X86-NOBMI2-NEXT: movl $-1, %edx 1329 ; X86-NOBMI2-NEXT: movl $-1, %eax 1330 ; X86-NOBMI2-NEXT: shll %cl, %eax 1331 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1332 ; X86-NOBMI2-NEXT: testb $32, %cl 1333 ; X86-NOBMI2-NEXT: je .LBB31_2 1334 ; X86-NOBMI2-NEXT: # %bb.1: 1335 ; X86-NOBMI2-NEXT: movl %eax, %edx 1336 ; X86-NOBMI2-NEXT: xorl %eax, %eax 1337 ; X86-NOBMI2-NEXT: .LBB31_2: 1338 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1339 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1340 ; X86-NOBMI2-NEXT: retl 1341 ; 1342 ; X86-BMI2-LABEL: clear_lowbits64_ic0: 1343 ; X86-BMI2: # %bb.0: 1344 ; X86-BMI2-NEXT: movl $64, %ecx 1345 ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1346 ; X86-BMI2-NEXT: movl $-1, %edx 1347 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1348 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1349 ; X86-BMI2-NEXT: testb $32, %cl 1350 ; X86-BMI2-NEXT: je .LBB31_2 1351 ; X86-BMI2-NEXT: # %bb.1: 1352 ; X86-BMI2-NEXT: movl %eax, %edx 1353 ; X86-BMI2-NEXT: xorl %eax, %eax 1354 ; X86-BMI2-NEXT: .LBB31_2: 1355 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1356 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1357 ; X86-BMI2-NEXT: retl 1358 ; 1359 ; X64-NOBMI2-LABEL: clear_lowbits64_ic0: 1360 ; X64-NOBMI2: # %bb.0: 1361 ; X64-NOBMI2-NEXT: movl $64, %ecx 1362 ; X64-NOBMI2-NEXT: subl %esi, %ecx 1363 ; X64-NOBMI2-NEXT: shrq %cl, %rdi 1364 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1365 ; X64-NOBMI2-NEXT: shlq %cl, %rdi 1366 ; X64-NOBMI2-NEXT: movq %rdi, %rax 1367 ; X64-NOBMI2-NEXT: retq 1368 ; 1369 ; X64-BMI2-LABEL: clear_lowbits64_ic0: 1370 ; X64-BMI2: # %bb.0: 1371 ; X64-BMI2-NEXT: movl $64, %eax 1372 ; X64-BMI2-NEXT: subl %esi, %eax 1373 ; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx 1374 ; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1375 ; X64-BMI2-NEXT: retq 1376 %numhighbits = sub i64 64, %numlowbits 1377 %mask = shl i64 -1, %numhighbits 1378 %masked = and i64 %mask, %val 1379 ret i64 %masked 1380 } 1381 1382 define i64 @clear_lowbits64_ic1_indexzext(i64 %val, i8 %numlowbits) nounwind { 1383 ; X86-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: 1384 ; X86-NOBMI2: # %bb.0: 1385 ; X86-NOBMI2-NEXT: movb $64, %cl 1386 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1387 ; X86-NOBMI2-NEXT: movl $-1, %edx 1388 ; X86-NOBMI2-NEXT: movl $-1, %eax 1389 ; X86-NOBMI2-NEXT: shll %cl, %eax 1390 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1391 ; X86-NOBMI2-NEXT: testb $32, %cl 1392 ; X86-NOBMI2-NEXT: je .LBB32_2 1393 ; X86-NOBMI2-NEXT: # %bb.1: 1394 ; X86-NOBMI2-NEXT: movl %eax, %edx 1395 ; X86-NOBMI2-NEXT: xorl %eax, %eax 1396 ; X86-NOBMI2-NEXT: .LBB32_2: 1397 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1398 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1399 ; X86-NOBMI2-NEXT: retl 1400 ; 1401 ; X86-BMI2-LABEL: clear_lowbits64_ic1_indexzext: 1402 ; X86-BMI2: # %bb.0: 1403 ; X86-BMI2-NEXT: movb $64, %cl 1404 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1405 ; X86-BMI2-NEXT: movl $-1, %edx 1406 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1407 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1408 ; X86-BMI2-NEXT: testb $32, %cl 1409 ; X86-BMI2-NEXT: je .LBB32_2 1410 ; X86-BMI2-NEXT: # %bb.1: 1411 ; X86-BMI2-NEXT: movl %eax, %edx 1412 ; X86-BMI2-NEXT: xorl %eax, %eax 1413 ; X86-BMI2-NEXT: .LBB32_2: 1414 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1415 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1416 ; X86-BMI2-NEXT: retl 1417 ; 1418 ; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: 1419 ; X64-NOBMI2: # %bb.0: 1420 ; X64-NOBMI2-NEXT: movb $64, %cl 1421 ; X64-NOBMI2-NEXT: subb %sil, %cl 1422 ; X64-NOBMI2-NEXT: shrq %cl, %rdi 1423 ; X64-NOBMI2-NEXT: shlq %cl, %rdi 1424 ; X64-NOBMI2-NEXT: movq %rdi, %rax 1425 ; X64-NOBMI2-NEXT: retq 1426 ; 1427 ; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext: 1428 ; X64-BMI2: # %bb.0: 1429 ; X64-BMI2-NEXT: movb $64, %al 1430 ; X64-BMI2-NEXT: subb %sil, %al 1431 ; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx 1432 ; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1433 ; X64-BMI2-NEXT: retq 1434 %numhighbits = sub i8 64, %numlowbits 1435 %sh_prom = zext i8 %numhighbits to i64 1436 %mask = shl i64 -1, %sh_prom 1437 %masked = and i64 %mask, %val 1438 ret i64 %masked 1439 } 1440 1441 define i64 @clear_lowbits64_ic2_load(i64* %w, i64 %numlowbits) nounwind { 1442 ; X86-NOBMI2-LABEL: clear_lowbits64_ic2_load: 1443 ; X86-NOBMI2: # %bb.0: 1444 ; X86-NOBMI2-NEXT: pushl %esi 1445 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1446 ; X86-NOBMI2-NEXT: movl $64, %ecx 1447 ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1448 ; X86-NOBMI2-NEXT: movl $-1, %edx 1449 ; X86-NOBMI2-NEXT: movl $-1, %eax 1450 ; X86-NOBMI2-NEXT: shll %cl, %eax 1451 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1452 ; X86-NOBMI2-NEXT: testb $32, %cl 1453 ; X86-NOBMI2-NEXT: je .LBB33_2 1454 ; X86-NOBMI2-NEXT: # %bb.1: 1455 ; X86-NOBMI2-NEXT: movl %eax, %edx 1456 ; X86-NOBMI2-NEXT: xorl %eax, %eax 1457 ; X86-NOBMI2-NEXT: .LBB33_2: 1458 ; X86-NOBMI2-NEXT: andl 4(%esi), %edx 1459 ; X86-NOBMI2-NEXT: andl (%esi), %eax 1460 ; X86-NOBMI2-NEXT: popl %esi 1461 ; X86-NOBMI2-NEXT: retl 1462 ; 1463 ; X86-BMI2-LABEL: clear_lowbits64_ic2_load: 1464 ; X86-BMI2: # %bb.0: 1465 ; X86-BMI2-NEXT: pushl %esi 1466 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1467 ; X86-BMI2-NEXT: movl $64, %ecx 1468 ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1469 ; X86-BMI2-NEXT: movl $-1, %edx 1470 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1471 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1472 ; X86-BMI2-NEXT: testb $32, %cl 1473 ; X86-BMI2-NEXT: je .LBB33_2 1474 ; X86-BMI2-NEXT: # %bb.1: 1475 ; X86-BMI2-NEXT: movl %eax, %edx 1476 ; X86-BMI2-NEXT: xorl %eax, %eax 1477 ; X86-BMI2-NEXT: .LBB33_2: 1478 ; X86-BMI2-NEXT: andl 4(%esi), %edx 1479 ; X86-BMI2-NEXT: andl (%esi), %eax 1480 ; X86-BMI2-NEXT: popl %esi 1481 ; X86-BMI2-NEXT: retl 1482 ; 1483 ; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load: 1484 ; X64-NOBMI2: # %bb.0: 1485 ; X64-NOBMI2-NEXT: movq (%rdi), %rax 1486 ; X64-NOBMI2-NEXT: movl $64, %ecx 1487 ; X64-NOBMI2-NEXT: subl %esi, %ecx 1488 ; X64-NOBMI2-NEXT: shrq %cl, %rax 1489 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1490 ; X64-NOBMI2-NEXT: shlq %cl, %rax 1491 ; X64-NOBMI2-NEXT: retq 1492 ; 1493 ; X64-BMI2-LABEL: clear_lowbits64_ic2_load: 1494 ; X64-BMI2: # %bb.0: 1495 ; X64-BMI2-NEXT: movl $64, %eax 1496 ; X64-BMI2-NEXT: subl %esi, %eax 1497 ; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx 1498 ; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1499 ; X64-BMI2-NEXT: retq 1500 %val = load i64, i64* %w 1501 %numhighbits = sub i64 64, %numlowbits 1502 %mask = shl i64 -1, %numhighbits 1503 %masked = and i64 %mask, %val 1504 ret i64 %masked 1505 } 1506 1507 define i64 @clear_lowbits64_ic3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 1508 ; X86-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1509 ; X86-NOBMI2: # %bb.0: 1510 ; X86-NOBMI2-NEXT: pushl %esi 1511 ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1512 ; X86-NOBMI2-NEXT: movb $64, %cl 1513 ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1514 ; X86-NOBMI2-NEXT: movl $-1, %edx 1515 ; X86-NOBMI2-NEXT: movl $-1, %eax 1516 ; X86-NOBMI2-NEXT: shll %cl, %eax 1517 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1518 ; X86-NOBMI2-NEXT: testb $32, %cl 1519 ; X86-NOBMI2-NEXT: je .LBB34_2 1520 ; X86-NOBMI2-NEXT: # %bb.1: 1521 ; X86-NOBMI2-NEXT: movl %eax, %edx 1522 ; X86-NOBMI2-NEXT: xorl %eax, %eax 1523 ; X86-NOBMI2-NEXT: .LBB34_2: 1524 ; X86-NOBMI2-NEXT: andl 4(%esi), %edx 1525 ; X86-NOBMI2-NEXT: andl (%esi), %eax 1526 ; X86-NOBMI2-NEXT: popl %esi 1527 ; X86-NOBMI2-NEXT: retl 1528 ; 1529 ; X86-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1530 ; X86-BMI2: # %bb.0: 1531 ; X86-BMI2-NEXT: pushl %esi 1532 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1533 ; X86-BMI2-NEXT: movb $64, %cl 1534 ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1535 ; X86-BMI2-NEXT: movl $-1, %edx 1536 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1537 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1538 ; X86-BMI2-NEXT: testb $32, %cl 1539 ; X86-BMI2-NEXT: je .LBB34_2 1540 ; X86-BMI2-NEXT: # %bb.1: 1541 ; X86-BMI2-NEXT: movl %eax, %edx 1542 ; X86-BMI2-NEXT: xorl %eax, %eax 1543 ; X86-BMI2-NEXT: .LBB34_2: 1544 ; X86-BMI2-NEXT: andl 4(%esi), %edx 1545 ; X86-BMI2-NEXT: andl (%esi), %eax 1546 ; X86-BMI2-NEXT: popl %esi 1547 ; X86-BMI2-NEXT: retl 1548 ; 1549 ; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1550 ; X64-NOBMI2: # %bb.0: 1551 ; X64-NOBMI2-NEXT: movq (%rdi), %rax 1552 ; X64-NOBMI2-NEXT: movb $64, %cl 1553 ; X64-NOBMI2-NEXT: subb %sil, %cl 1554 ; X64-NOBMI2-NEXT: shrq %cl, %rax 1555 ; X64-NOBMI2-NEXT: shlq %cl, %rax 1556 ; X64-NOBMI2-NEXT: retq 1557 ; 1558 ; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1559 ; X64-BMI2: # %bb.0: 1560 ; X64-BMI2-NEXT: movb $64, %al 1561 ; X64-BMI2-NEXT: subb %sil, %al 1562 ; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx 1563 ; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1564 ; X64-BMI2-NEXT: retq 1565 %val = load i64, i64* %w 1566 %numhighbits = sub i8 64, %numlowbits 1567 %sh_prom = zext i8 %numhighbits to i64 1568 %mask = shl i64 -1, %sh_prom 1569 %masked = and i64 %mask, %val 1570 ret i64 %masked 1571 } 1572 1573 define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind { 1574 ; X86-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: 1575 ; X86-NOBMI2: # %bb.0: 1576 ; X86-NOBMI2-NEXT: movl $64, %ecx 1577 ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1578 ; X86-NOBMI2-NEXT: movl $-1, %edx 1579 ; X86-NOBMI2-NEXT: movl $-1, %eax 1580 ; X86-NOBMI2-NEXT: shll %cl, %eax 1581 ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1582 ; X86-NOBMI2-NEXT: testb $32, %cl 1583 ; X86-NOBMI2-NEXT: je .LBB35_2 1584 ; X86-NOBMI2-NEXT: # %bb.1: 1585 ; X86-NOBMI2-NEXT: movl %eax, %edx 1586 ; X86-NOBMI2-NEXT: xorl %eax, %eax 1587 ; X86-NOBMI2-NEXT: .LBB35_2: 1588 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1589 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1590 ; X86-NOBMI2-NEXT: retl 1591 ; 1592 ; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative: 1593 ; X86-BMI2: # %bb.0: 1594 ; X86-BMI2-NEXT: movl $64, %ecx 1595 ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1596 ; X86-BMI2-NEXT: movl $-1, %edx 1597 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1598 ; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1599 ; X86-BMI2-NEXT: testb $32, %cl 1600 ; X86-BMI2-NEXT: je .LBB35_2 1601 ; X86-BMI2-NEXT: # %bb.1: 1602 ; X86-BMI2-NEXT: movl %eax, %edx 1603 ; X86-BMI2-NEXT: xorl %eax, %eax 1604 ; X86-BMI2-NEXT: .LBB35_2: 1605 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1606 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1607 ; X86-BMI2-NEXT: retl 1608 ; 1609 ; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: 1610 ; X64-NOBMI2: # %bb.0: 1611 ; X64-NOBMI2-NEXT: movl $64, %ecx 1612 ; X64-NOBMI2-NEXT: subl %esi, %ecx 1613 ; X64-NOBMI2-NEXT: shrq %cl, %rdi 1614 ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1615 ; X64-NOBMI2-NEXT: shlq %cl, %rdi 1616 ; X64-NOBMI2-NEXT: movq %rdi, %rax 1617 ; X64-NOBMI2-NEXT: retq 1618 ; 1619 ; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative: 1620 ; X64-BMI2: # %bb.0: 1621 ; X64-BMI2-NEXT: movl $64, %eax 1622 ; X64-BMI2-NEXT: subl %esi, %eax 1623 ; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx 1624 ; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1625 ; X64-BMI2-NEXT: retq 1626 %numhighbits = sub i64 64, %numlowbits 1627 %mask = shl i64 -1, %numhighbits 1628 %masked = and i64 %val, %mask ; swapped order 1629 ret i64 %masked 1630 } 1631 1632 ; ---------------------------------------------------------------------------- ; 1633 ; Multi-use tests 1634 ; ---------------------------------------------------------------------------- ; 1635 1636 declare void @use32(i32) 1637 declare void @use64(i64) 1638 1639 define i32 @oneuse32(i32 %val, i32 %numlowbits) nounwind { 1640 ; X86-NOBMI2-LABEL: oneuse32: 1641 ; X86-NOBMI2: # %bb.0: 1642 ; X86-NOBMI2-NEXT: pushl %esi 1643 ; X86-NOBMI2-NEXT: subl $8, %esp 1644 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1645 ; X86-NOBMI2-NEXT: movl $-1, %esi 1646 ; X86-NOBMI2-NEXT: shll %cl, %esi 1647 ; X86-NOBMI2-NEXT: movl %esi, (%esp) 1648 ; X86-NOBMI2-NEXT: calll use32 1649 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1650 ; X86-NOBMI2-NEXT: movl %esi, %eax 1651 ; X86-NOBMI2-NEXT: addl $8, %esp 1652 ; X86-NOBMI2-NEXT: popl %esi 1653 ; X86-NOBMI2-NEXT: retl 1654 ; 1655 ; X86-BMI2-LABEL: oneuse32: 1656 ; X86-BMI2: # %bb.0: 1657 ; X86-BMI2-NEXT: pushl %esi 1658 ; X86-BMI2-NEXT: subl $8, %esp 1659 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1660 ; X86-BMI2-NEXT: movl $-1, %ecx 1661 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %esi 1662 ; X86-BMI2-NEXT: movl %esi, (%esp) 1663 ; X86-BMI2-NEXT: calll use32 1664 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1665 ; X86-BMI2-NEXT: movl %esi, %eax 1666 ; X86-BMI2-NEXT: addl $8, %esp 1667 ; X86-BMI2-NEXT: popl %esi 1668 ; X86-BMI2-NEXT: retl 1669 ; 1670 ; X64-NOBMI2-LABEL: oneuse32: 1671 ; X64-NOBMI2: # %bb.0: 1672 ; X64-NOBMI2-NEXT: pushq %rbp 1673 ; X64-NOBMI2-NEXT: pushq %rbx 1674 ; X64-NOBMI2-NEXT: pushq %rax 1675 ; X64-NOBMI2-NEXT: movl %edi, %ebx 1676 ; X64-NOBMI2-NEXT: movl $-1, %ebp 1677 ; X64-NOBMI2-NEXT: movl %esi, %ecx 1678 ; X64-NOBMI2-NEXT: shll %cl, %ebp 1679 ; X64-NOBMI2-NEXT: movl %ebp, %edi 1680 ; X64-NOBMI2-NEXT: callq use32 1681 ; X64-NOBMI2-NEXT: andl %ebx, %ebp 1682 ; X64-NOBMI2-NEXT: movl %ebp, %eax 1683 ; X64-NOBMI2-NEXT: addq $8, %rsp 1684 ; X64-NOBMI2-NEXT: popq %rbx 1685 ; X64-NOBMI2-NEXT: popq %rbp 1686 ; X64-NOBMI2-NEXT: retq 1687 ; 1688 ; X64-BMI2-LABEL: oneuse32: 1689 ; X64-BMI2: # %bb.0: 1690 ; X64-BMI2-NEXT: pushq %rbp 1691 ; X64-BMI2-NEXT: pushq %rbx 1692 ; X64-BMI2-NEXT: pushq %rax 1693 ; X64-BMI2-NEXT: movl %edi, %ebx 1694 ; X64-BMI2-NEXT: movl $-1, %eax 1695 ; X64-BMI2-NEXT: shlxl %esi, %eax, %ebp 1696 ; X64-BMI2-NEXT: movl %ebp, %edi 1697 ; X64-BMI2-NEXT: callq use32 1698 ; X64-BMI2-NEXT: andl %ebx, %ebp 1699 ; X64-BMI2-NEXT: movl %ebp, %eax 1700 ; X64-BMI2-NEXT: addq $8, %rsp 1701 ; X64-BMI2-NEXT: popq %rbx 1702 ; X64-BMI2-NEXT: popq %rbp 1703 ; X64-BMI2-NEXT: retq 1704 %mask = shl i32 -1, %numlowbits 1705 call void @use32(i32 %mask) 1706 %masked = and i32 %mask, %val 1707 ret i32 %masked 1708 } 1709 1710 define i64 @oneuse64(i64 %val, i64 %numlowbits) nounwind { 1711 ; X86-NOBMI2-LABEL: oneuse64: 1712 ; X86-NOBMI2: # %bb.0: 1713 ; X86-NOBMI2-NEXT: pushl %edi 1714 ; X86-NOBMI2-NEXT: pushl %esi 1715 ; X86-NOBMI2-NEXT: pushl %eax 1716 ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1717 ; X86-NOBMI2-NEXT: movl $-1, %esi 1718 ; X86-NOBMI2-NEXT: movl $-1, %edi 1719 ; X86-NOBMI2-NEXT: shll %cl, %edi 1720 ; X86-NOBMI2-NEXT: shldl %cl, %esi, %esi 1721 ; X86-NOBMI2-NEXT: testb $32, %cl 1722 ; X86-NOBMI2-NEXT: je .LBB37_2 1723 ; X86-NOBMI2-NEXT: # %bb.1: 1724 ; X86-NOBMI2-NEXT: movl %edi, %esi 1725 ; X86-NOBMI2-NEXT: xorl %edi, %edi 1726 ; X86-NOBMI2-NEXT: .LBB37_2: 1727 ; X86-NOBMI2-NEXT: subl $8, %esp 1728 ; X86-NOBMI2-NEXT: pushl %esi 1729 ; X86-NOBMI2-NEXT: pushl %edi 1730 ; X86-NOBMI2-NEXT: calll use64 1731 ; X86-NOBMI2-NEXT: addl $16, %esp 1732 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1733 ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 1734 ; X86-NOBMI2-NEXT: movl %edi, %eax 1735 ; X86-NOBMI2-NEXT: movl %esi, %edx 1736 ; X86-NOBMI2-NEXT: addl $4, %esp 1737 ; X86-NOBMI2-NEXT: popl %esi 1738 ; X86-NOBMI2-NEXT: popl %edi 1739 ; X86-NOBMI2-NEXT: retl 1740 ; 1741 ; X86-BMI2-LABEL: oneuse64: 1742 ; X86-BMI2: # %bb.0: 1743 ; X86-BMI2-NEXT: pushl %edi 1744 ; X86-BMI2-NEXT: pushl %esi 1745 ; X86-BMI2-NEXT: pushl %eax 1746 ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1747 ; X86-BMI2-NEXT: movl $-1, %esi 1748 ; X86-BMI2-NEXT: shlxl %ecx, %esi, %edi 1749 ; X86-BMI2-NEXT: shldl %cl, %esi, %esi 1750 ; X86-BMI2-NEXT: testb $32, %cl 1751 ; X86-BMI2-NEXT: je .LBB37_2 1752 ; X86-BMI2-NEXT: # %bb.1: 1753 ; X86-BMI2-NEXT: movl %edi, %esi 1754 ; X86-BMI2-NEXT: xorl %edi, %edi 1755 ; X86-BMI2-NEXT: .LBB37_2: 1756 ; X86-BMI2-NEXT: subl $8, %esp 1757 ; X86-BMI2-NEXT: pushl %esi 1758 ; X86-BMI2-NEXT: pushl %edi 1759 ; X86-BMI2-NEXT: calll use64 1760 ; X86-BMI2-NEXT: addl $16, %esp 1761 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1762 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 1763 ; X86-BMI2-NEXT: movl %edi, %eax 1764 ; X86-BMI2-NEXT: movl %esi, %edx 1765 ; X86-BMI2-NEXT: addl $4, %esp 1766 ; X86-BMI2-NEXT: popl %esi 1767 ; X86-BMI2-NEXT: popl %edi 1768 ; X86-BMI2-NEXT: retl 1769 ; 1770 ; X64-NOBMI2-LABEL: oneuse64: 1771 ; X64-NOBMI2: # %bb.0: 1772 ; X64-NOBMI2-NEXT: pushq %r14 1773 ; X64-NOBMI2-NEXT: pushq %rbx 1774 ; X64-NOBMI2-NEXT: pushq %rax 1775 ; X64-NOBMI2-NEXT: movq %rdi, %r14 1776 ; X64-NOBMI2-NEXT: movq $-1, %rbx 1777 ; X64-NOBMI2-NEXT: movl %esi, %ecx 1778 ; X64-NOBMI2-NEXT: shlq %cl, %rbx 1779 ; X64-NOBMI2-NEXT: movq %rbx, %rdi 1780 ; X64-NOBMI2-NEXT: callq use64 1781 ; X64-NOBMI2-NEXT: andq %r14, %rbx 1782 ; X64-NOBMI2-NEXT: movq %rbx, %rax 1783 ; X64-NOBMI2-NEXT: addq $8, %rsp 1784 ; X64-NOBMI2-NEXT: popq %rbx 1785 ; X64-NOBMI2-NEXT: popq %r14 1786 ; X64-NOBMI2-NEXT: retq 1787 ; 1788 ; X64-BMI2-LABEL: oneuse64: 1789 ; X64-BMI2: # %bb.0: 1790 ; X64-BMI2-NEXT: pushq %r14 1791 ; X64-BMI2-NEXT: pushq %rbx 1792 ; X64-BMI2-NEXT: pushq %rax 1793 ; X64-BMI2-NEXT: movq %rdi, %r14 1794 ; X64-BMI2-NEXT: movq $-1, %rax 1795 ; X64-BMI2-NEXT: shlxq %rsi, %rax, %rbx 1796 ; X64-BMI2-NEXT: movq %rbx, %rdi 1797 ; X64-BMI2-NEXT: callq use64 1798 ; X64-BMI2-NEXT: andq %r14, %rbx 1799 ; X64-BMI2-NEXT: movq %rbx, %rax 1800 ; X64-BMI2-NEXT: addq $8, %rsp 1801 ; X64-BMI2-NEXT: popq %rbx 1802 ; X64-BMI2-NEXT: popq %r14 1803 ; X64-BMI2-NEXT: retq 1804 %mask = shl i64 -1, %numlowbits 1805 call void @use64(i64 %mask) 1806 %masked = and i64 %mask, %val 1807 ret i64 %masked 1808 } 1809