1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64 4 5 ; Check that we recognize this idiom for rotation too: 6 ; a << (b & (OpSize-1)) | a >> ((0 - b) & (OpSize-1)) 7 8 define i32 @rotate_left_32(i32 %a, i32 %b) { 9 ; X86-LABEL: rotate_left_32: 10 ; X86: # %bb.0: 11 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 12 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 13 ; X86-NEXT: roll %cl, %eax 14 ; X86-NEXT: retl 15 ; 16 ; X64-LABEL: rotate_left_32: 17 ; X64: # %bb.0: 18 ; X64-NEXT: movl %esi, %ecx 19 ; X64-NEXT: roll %cl, %edi 20 ; X64-NEXT: movl %edi, %eax 21 ; X64-NEXT: retq 22 %and = and i32 %b, 31 23 %shl = shl i32 %a, %and 24 %t0 = sub i32 0, %b 25 %and3 = and i32 %t0, 31 26 %shr = lshr i32 %a, %and3 27 %or = or i32 %shl, %shr 28 ret i32 %or 29 } 30 31 define i32 @rotate_right_32(i32 %a, i32 %b) { 32 ; X86-LABEL: rotate_right_32: 33 ; X86: # %bb.0: 34 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 35 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 36 ; X86-NEXT: rorl %cl, %eax 37 ; X86-NEXT: retl 38 ; 39 ; X64-LABEL: rotate_right_32: 40 ; X64: # %bb.0: 41 ; X64-NEXT: movl %esi, %ecx 42 ; X64-NEXT: rorl %cl, %edi 43 ; X64-NEXT: movl %edi, %eax 44 ; X64-NEXT: retq 45 %and = and i32 %b, 31 46 %shl = lshr i32 %a, %and 47 %t0 = sub i32 0, %b 48 %and3 = and i32 %t0, 31 49 %shr = shl i32 %a, %and3 50 %or = or i32 %shl, %shr 51 ret i32 %or 52 } 53 54 define i64 @rotate_left_64(i64 %a, i64 %b) { 55 ; X86-LABEL: rotate_left_64: 56 ; X86: # %bb.0: 57 ; X86-NEXT: pushl %ebx 58 ; X86-NEXT: .cfi_def_cfa_offset 8 59 ; X86-NEXT: pushl %edi 60 ; X86-NEXT: .cfi_def_cfa_offset 12 61 ; X86-NEXT: pushl %esi 62 ; X86-NEXT: .cfi_def_cfa_offset 16 63 ; X86-NEXT: .cfi_offset %esi, -16 64 ; X86-NEXT: .cfi_offset %edi, -12 65 ; X86-NEXT: .cfi_offset %ebx, -8 66 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 67 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 68 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 69 ; X86-NEXT: movl %esi, %eax 70 ; X86-NEXT: shll %cl, %eax 71 ; X86-NEXT: movl %edi, %edx 72 ; X86-NEXT: shldl %cl, %esi, %edx 73 ; X86-NEXT: testb $32, %cl 74 ; X86-NEXT: je .LBB2_2 75 ; X86-NEXT: # %bb.1: 76 ; X86-NEXT: movl %eax, %edx 77 ; X86-NEXT: xorl %eax, %eax 78 ; X86-NEXT: .LBB2_2: 79 ; X86-NEXT: negl %ecx 80 ; X86-NEXT: movl %edi, %ebx 81 ; X86-NEXT: shrl %cl, %ebx 82 ; X86-NEXT: shrdl %cl, %edi, %esi 83 ; X86-NEXT: testb $32, %cl 84 ; X86-NEXT: je .LBB2_4 85 ; X86-NEXT: # %bb.3: 86 ; X86-NEXT: movl %ebx, %esi 87 ; X86-NEXT: xorl %ebx, %ebx 88 ; X86-NEXT: .LBB2_4: 89 ; X86-NEXT: orl %ebx, %edx 90 ; X86-NEXT: orl %esi, %eax 91 ; X86-NEXT: popl %esi 92 ; X86-NEXT: .cfi_def_cfa_offset 12 93 ; X86-NEXT: popl %edi 94 ; X86-NEXT: .cfi_def_cfa_offset 8 95 ; X86-NEXT: popl %ebx 96 ; X86-NEXT: .cfi_def_cfa_offset 4 97 ; X86-NEXT: retl 98 ; 99 ; X64-LABEL: rotate_left_64: 100 ; X64: # %bb.0: 101 ; X64-NEXT: movl %esi, %ecx 102 ; X64-NEXT: rolq %cl, %rdi 103 ; X64-NEXT: movq %rdi, %rax 104 ; X64-NEXT: retq 105 %and = and i64 %b, 63 106 %shl = shl i64 %a, %and 107 %t0 = sub i64 0, %b 108 %and3 = and i64 %t0, 63 109 %shr = lshr i64 %a, %and3 110 %or = or i64 %shl, %shr 111 ret i64 %or 112 } 113 114 define i64 @rotate_right_64(i64 %a, i64 %b) { 115 ; X86-LABEL: rotate_right_64: 116 ; X86: # %bb.0: 117 ; X86-NEXT: pushl %ebx 118 ; X86-NEXT: .cfi_def_cfa_offset 8 119 ; X86-NEXT: pushl %edi 120 ; X86-NEXT: .cfi_def_cfa_offset 12 121 ; X86-NEXT: pushl %esi 122 ; X86-NEXT: .cfi_def_cfa_offset 16 123 ; X86-NEXT: .cfi_offset %esi, -16 124 ; X86-NEXT: .cfi_offset %edi, -12 125 ; X86-NEXT: .cfi_offset %ebx, -8 126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 127 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 128 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 129 ; X86-NEXT: movl %esi, %edx 130 ; X86-NEXT: shrl %cl, %edx 131 ; X86-NEXT: movl %edi, %eax 132 ; X86-NEXT: shrdl %cl, %esi, %eax 133 ; X86-NEXT: testb $32, %cl 134 ; X86-NEXT: je .LBB3_2 135 ; X86-NEXT: # %bb.1: 136 ; X86-NEXT: movl %edx, %eax 137 ; X86-NEXT: xorl %edx, %edx 138 ; X86-NEXT: .LBB3_2: 139 ; X86-NEXT: negl %ecx 140 ; X86-NEXT: movl %edi, %ebx 141 ; X86-NEXT: shll %cl, %ebx 142 ; X86-NEXT: shldl %cl, %edi, %esi 143 ; X86-NEXT: testb $32, %cl 144 ; X86-NEXT: je .LBB3_4 145 ; X86-NEXT: # %bb.3: 146 ; X86-NEXT: movl %ebx, %esi 147 ; X86-NEXT: xorl %ebx, %ebx 148 ; X86-NEXT: .LBB3_4: 149 ; X86-NEXT: orl %esi, %edx 150 ; X86-NEXT: orl %ebx, %eax 151 ; X86-NEXT: popl %esi 152 ; X86-NEXT: .cfi_def_cfa_offset 12 153 ; X86-NEXT: popl %edi 154 ; X86-NEXT: .cfi_def_cfa_offset 8 155 ; X86-NEXT: popl %ebx 156 ; X86-NEXT: .cfi_def_cfa_offset 4 157 ; X86-NEXT: retl 158 ; 159 ; X64-LABEL: rotate_right_64: 160 ; X64: # %bb.0: 161 ; X64-NEXT: movl %esi, %ecx 162 ; X64-NEXT: rorq %cl, %rdi 163 ; X64-NEXT: movq %rdi, %rax 164 ; X64-NEXT: retq 165 %and = and i64 %b, 63 166 %shl = lshr i64 %a, %and 167 %t0 = sub i64 0, %b 168 %and3 = and i64 %t0, 63 169 %shr = shl i64 %a, %and3 170 %or = or i64 %shl, %shr 171 ret i64 %or 172 } 173 174 ; Also check mem operand. 175 176 define void @rotate_left_m32(i32 *%pa, i32 %b) { 177 ; X86-LABEL: rotate_left_m32: 178 ; X86: # %bb.0: 179 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 180 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 181 ; X86-NEXT: roll %cl, (%eax) 182 ; X86-NEXT: retl 183 ; 184 ; X64-LABEL: rotate_left_m32: 185 ; X64: # %bb.0: 186 ; X64-NEXT: movl %esi, %ecx 187 ; X64-NEXT: roll %cl, (%rdi) 188 ; X64-NEXT: retq 189 %a = load i32, i32* %pa, align 16 190 %and = and i32 %b, 31 191 %shl = shl i32 %a, %and 192 %t0 = sub i32 0, %b 193 %and3 = and i32 %t0, 31 194 %shr = lshr i32 %a, %and3 195 %or = or i32 %shl, %shr 196 store i32 %or, i32* %pa, align 32 197 ret void 198 } 199 200 define void @rotate_right_m32(i32 *%pa, i32 %b) { 201 ; X86-LABEL: rotate_right_m32: 202 ; X86: # %bb.0: 203 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 204 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 205 ; X86-NEXT: rorl %cl, (%eax) 206 ; X86-NEXT: retl 207 ; 208 ; X64-LABEL: rotate_right_m32: 209 ; X64: # %bb.0: 210 ; X64-NEXT: movl %esi, %ecx 211 ; X64-NEXT: rorl %cl, (%rdi) 212 ; X64-NEXT: retq 213 %a = load i32, i32* %pa, align 16 214 %and = and i32 %b, 31 215 %shl = lshr i32 %a, %and 216 %t0 = sub i32 0, %b 217 %and3 = and i32 %t0, 31 218 %shr = shl i32 %a, %and3 219 %or = or i32 %shl, %shr 220 store i32 %or, i32* %pa, align 32 221 ret void 222 } 223 224 define void @rotate_left_m64(i64 *%pa, i64 %b) { 225 ; X86-LABEL: rotate_left_m64: 226 ; X86: # %bb.0: 227 ; X86-NEXT: pushl %ebp 228 ; X86-NEXT: .cfi_def_cfa_offset 8 229 ; X86-NEXT: pushl %ebx 230 ; X86-NEXT: .cfi_def_cfa_offset 12 231 ; X86-NEXT: pushl %edi 232 ; X86-NEXT: .cfi_def_cfa_offset 16 233 ; X86-NEXT: pushl %esi 234 ; X86-NEXT: .cfi_def_cfa_offset 20 235 ; X86-NEXT: .cfi_offset %esi, -20 236 ; X86-NEXT: .cfi_offset %edi, -16 237 ; X86-NEXT: .cfi_offset %ebx, -12 238 ; X86-NEXT: .cfi_offset %ebp, -8 239 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 240 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 241 ; X86-NEXT: movl (%eax), %edx 242 ; X86-NEXT: movl 4(%eax), %ebx 243 ; X86-NEXT: movl %edx, %esi 244 ; X86-NEXT: shll %cl, %esi 245 ; X86-NEXT: movl %ebx, %edi 246 ; X86-NEXT: shldl %cl, %edx, %edi 247 ; X86-NEXT: testb $32, %cl 248 ; X86-NEXT: je .LBB6_2 249 ; X86-NEXT: # %bb.1: 250 ; X86-NEXT: movl %esi, %edi 251 ; X86-NEXT: xorl %esi, %esi 252 ; X86-NEXT: .LBB6_2: 253 ; X86-NEXT: negl %ecx 254 ; X86-NEXT: movl %ebx, %ebp 255 ; X86-NEXT: shrl %cl, %ebp 256 ; X86-NEXT: shrdl %cl, %ebx, %edx 257 ; X86-NEXT: testb $32, %cl 258 ; X86-NEXT: je .LBB6_4 259 ; X86-NEXT: # %bb.3: 260 ; X86-NEXT: movl %ebp, %edx 261 ; X86-NEXT: xorl %ebp, %ebp 262 ; X86-NEXT: .LBB6_4: 263 ; X86-NEXT: orl %ebp, %edi 264 ; X86-NEXT: orl %edx, %esi 265 ; X86-NEXT: movl %edi, 4(%eax) 266 ; X86-NEXT: movl %esi, (%eax) 267 ; X86-NEXT: popl %esi 268 ; X86-NEXT: .cfi_def_cfa_offset 16 269 ; X86-NEXT: popl %edi 270 ; X86-NEXT: .cfi_def_cfa_offset 12 271 ; X86-NEXT: popl %ebx 272 ; X86-NEXT: .cfi_def_cfa_offset 8 273 ; X86-NEXT: popl %ebp 274 ; X86-NEXT: .cfi_def_cfa_offset 4 275 ; X86-NEXT: retl 276 ; 277 ; X64-LABEL: rotate_left_m64: 278 ; X64: # %bb.0: 279 ; X64-NEXT: movl %esi, %ecx 280 ; X64-NEXT: rolq %cl, (%rdi) 281 ; X64-NEXT: retq 282 %a = load i64, i64* %pa, align 16 283 %and = and i64 %b, 63 284 %shl = shl i64 %a, %and 285 %t0 = sub i64 0, %b 286 %and3 = and i64 %t0, 63 287 %shr = lshr i64 %a, %and3 288 %or = or i64 %shl, %shr 289 store i64 %or, i64* %pa, align 64 290 ret void 291 } 292 293 define void @rotate_right_m64(i64 *%pa, i64 %b) { 294 ; X86-LABEL: rotate_right_m64: 295 ; X86: # %bb.0: 296 ; X86-NEXT: pushl %ebp 297 ; X86-NEXT: .cfi_def_cfa_offset 8 298 ; X86-NEXT: pushl %ebx 299 ; X86-NEXT: .cfi_def_cfa_offset 12 300 ; X86-NEXT: pushl %edi 301 ; X86-NEXT: .cfi_def_cfa_offset 16 302 ; X86-NEXT: pushl %esi 303 ; X86-NEXT: .cfi_def_cfa_offset 20 304 ; X86-NEXT: .cfi_offset %esi, -20 305 ; X86-NEXT: .cfi_offset %edi, -16 306 ; X86-NEXT: .cfi_offset %ebx, -12 307 ; X86-NEXT: .cfi_offset %ebp, -8 308 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 309 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 310 ; X86-NEXT: movl (%eax), %ebx 311 ; X86-NEXT: movl 4(%eax), %edx 312 ; X86-NEXT: movl %edx, %esi 313 ; X86-NEXT: shrl %cl, %esi 314 ; X86-NEXT: movl %ebx, %edi 315 ; X86-NEXT: shrdl %cl, %edx, %edi 316 ; X86-NEXT: testb $32, %cl 317 ; X86-NEXT: je .LBB7_2 318 ; X86-NEXT: # %bb.1: 319 ; X86-NEXT: movl %esi, %edi 320 ; X86-NEXT: xorl %esi, %esi 321 ; X86-NEXT: .LBB7_2: 322 ; X86-NEXT: negl %ecx 323 ; X86-NEXT: movl %ebx, %ebp 324 ; X86-NEXT: shll %cl, %ebp 325 ; X86-NEXT: shldl %cl, %ebx, %edx 326 ; X86-NEXT: testb $32, %cl 327 ; X86-NEXT: je .LBB7_4 328 ; X86-NEXT: # %bb.3: 329 ; X86-NEXT: movl %ebp, %edx 330 ; X86-NEXT: xorl %ebp, %ebp 331 ; X86-NEXT: .LBB7_4: 332 ; X86-NEXT: orl %edx, %esi 333 ; X86-NEXT: orl %ebp, %edi 334 ; X86-NEXT: movl %esi, 4(%eax) 335 ; X86-NEXT: movl %edi, (%eax) 336 ; X86-NEXT: popl %esi 337 ; X86-NEXT: .cfi_def_cfa_offset 16 338 ; X86-NEXT: popl %edi 339 ; X86-NEXT: .cfi_def_cfa_offset 12 340 ; X86-NEXT: popl %ebx 341 ; X86-NEXT: .cfi_def_cfa_offset 8 342 ; X86-NEXT: popl %ebp 343 ; X86-NEXT: .cfi_def_cfa_offset 4 344 ; X86-NEXT: retl 345 ; 346 ; X64-LABEL: rotate_right_m64: 347 ; X64: # %bb.0: 348 ; X64-NEXT: movl %esi, %ecx 349 ; X64-NEXT: rorq %cl, (%rdi) 350 ; X64-NEXT: retq 351 %a = load i64, i64* %pa, align 16 352 %and = and i64 %b, 63 353 %shl = lshr i64 %a, %and 354 %t0 = sub i64 0, %b 355 %and3 = and i64 %t0, 63 356 %shr = shl i64 %a, %and3 357 %or = or i64 %shl, %shr 358 store i64 %or, i64* %pa, align 64 359 ret void 360 } 361 362 ; The next 8 tests include masks of the narrow width shift amounts that should be eliminated. 363 ; These patterns are produced by instcombine after r310509. 364 365 define i8 @rotate_left_8(i8 %x, i32 %amount) { 366 ; X86-LABEL: rotate_left_8: 367 ; X86: # %bb.0: 368 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 369 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 370 ; X86-NEXT: rolb %cl, %al 371 ; X86-NEXT: retl 372 ; 373 ; X64-LABEL: rotate_left_8: 374 ; X64: # %bb.0: 375 ; X64-NEXT: movl %esi, %ecx 376 ; X64-NEXT: rolb %cl, %dil 377 ; X64-NEXT: movl %edi, %eax 378 ; X64-NEXT: retq 379 %amt = trunc i32 %amount to i8 380 %sub = sub i8 0, %amt 381 %maskamt = and i8 %amt, 7 382 %masksub = and i8 %sub, 7 383 %shl = shl i8 %x, %maskamt 384 %shr = lshr i8 %x, %masksub 385 %or = or i8 %shl, %shr 386 ret i8 %or 387 } 388 389 define i8 @rotate_right_8(i8 %x, i32 %amount) { 390 ; X86-LABEL: rotate_right_8: 391 ; X86: # %bb.0: 392 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 393 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 394 ; X86-NEXT: rorb %cl, %al 395 ; X86-NEXT: retl 396 ; 397 ; X64-LABEL: rotate_right_8: 398 ; X64: # %bb.0: 399 ; X64-NEXT: movl %esi, %ecx 400 ; X64-NEXT: rorb %cl, %dil 401 ; X64-NEXT: movl %edi, %eax 402 ; X64-NEXT: retq 403 %amt = trunc i32 %amount to i8 404 %sub = sub i8 0, %amt 405 %maskamt = and i8 %amt, 7 406 %masksub = and i8 %sub, 7 407 %shr = lshr i8 %x, %maskamt 408 %shl = shl i8 %x, %masksub 409 %or = or i8 %shr, %shl 410 ret i8 %or 411 } 412 413 define i16 @rotate_left_16(i16 %x, i32 %amount) { 414 ; X86-LABEL: rotate_left_16: 415 ; X86: # %bb.0: 416 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 417 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 418 ; X86-NEXT: rolw %cl, %ax 419 ; X86-NEXT: retl 420 ; 421 ; X64-LABEL: rotate_left_16: 422 ; X64: # %bb.0: 423 ; X64-NEXT: movl %esi, %ecx 424 ; X64-NEXT: rolw %cl, %di 425 ; X64-NEXT: movl %edi, %eax 426 ; X64-NEXT: retq 427 %amt = trunc i32 %amount to i16 428 %sub = sub i16 0, %amt 429 %maskamt = and i16 %amt, 15 430 %masksub = and i16 %sub, 15 431 %shl = shl i16 %x, %maskamt 432 %shr = lshr i16 %x, %masksub 433 %or = or i16 %shl, %shr 434 ret i16 %or 435 } 436 437 define i16 @rotate_right_16(i16 %x, i32 %amount) { 438 ; X86-LABEL: rotate_right_16: 439 ; X86: # %bb.0: 440 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 441 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 442 ; X86-NEXT: rorw %cl, %ax 443 ; X86-NEXT: retl 444 ; 445 ; X64-LABEL: rotate_right_16: 446 ; X64: # %bb.0: 447 ; X64-NEXT: movl %esi, %ecx 448 ; X64-NEXT: rorw %cl, %di 449 ; X64-NEXT: movl %edi, %eax 450 ; X64-NEXT: retq 451 %amt = trunc i32 %amount to i16 452 %sub = sub i16 0, %amt 453 %maskamt = and i16 %amt, 15 454 %masksub = and i16 %sub, 15 455 %shr = lshr i16 %x, %maskamt 456 %shl = shl i16 %x, %masksub 457 %or = or i16 %shr, %shl 458 ret i16 %or 459 } 460 461 define void @rotate_left_m8(i8* %p, i32 %amount) { 462 ; X86-LABEL: rotate_left_m8: 463 ; X86: # %bb.0: 464 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 465 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 466 ; X86-NEXT: rolb %cl, (%eax) 467 ; X86-NEXT: retl 468 ; 469 ; X64-LABEL: rotate_left_m8: 470 ; X64: # %bb.0: 471 ; X64-NEXT: movl %esi, %ecx 472 ; X64-NEXT: rolb %cl, (%rdi) 473 ; X64-NEXT: retq 474 %x = load i8, i8* %p, align 1 475 %amt = trunc i32 %amount to i8 476 %sub = sub i8 0, %amt 477 %maskamt = and i8 %amt, 7 478 %masksub = and i8 %sub, 7 479 %shl = shl i8 %x, %maskamt 480 %shr = lshr i8 %x, %masksub 481 %or = or i8 %shl, %shr 482 store i8 %or, i8* %p, align 1 483 ret void 484 } 485 486 define void @rotate_right_m8(i8* %p, i32 %amount) { 487 ; X86-LABEL: rotate_right_m8: 488 ; X86: # %bb.0: 489 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 490 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 491 ; X86-NEXT: rorb %cl, (%eax) 492 ; X86-NEXT: retl 493 ; 494 ; X64-LABEL: rotate_right_m8: 495 ; X64: # %bb.0: 496 ; X64-NEXT: movl %esi, %ecx 497 ; X64-NEXT: rorb %cl, (%rdi) 498 ; X64-NEXT: retq 499 %x = load i8, i8* %p, align 1 500 %amt = trunc i32 %amount to i8 501 %sub = sub i8 0, %amt 502 %maskamt = and i8 %amt, 7 503 %masksub = and i8 %sub, 7 504 %shl = shl i8 %x, %masksub 505 %shr = lshr i8 %x, %maskamt 506 %or = or i8 %shl, %shr 507 store i8 %or, i8* %p, align 1 508 ret void 509 } 510 511 define void @rotate_left_m16(i16* %p, i32 %amount) { 512 ; X86-LABEL: rotate_left_m16: 513 ; X86: # %bb.0: 514 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 515 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 516 ; X86-NEXT: rolw %cl, (%eax) 517 ; X86-NEXT: retl 518 ; 519 ; X64-LABEL: rotate_left_m16: 520 ; X64: # %bb.0: 521 ; X64-NEXT: movl %esi, %ecx 522 ; X64-NEXT: rolw %cl, (%rdi) 523 ; X64-NEXT: retq 524 %x = load i16, i16* %p, align 1 525 %amt = trunc i32 %amount to i16 526 %sub = sub i16 0, %amt 527 %maskamt = and i16 %amt, 15 528 %masksub = and i16 %sub, 15 529 %shl = shl i16 %x, %maskamt 530 %shr = lshr i16 %x, %masksub 531 %or = or i16 %shl, %shr 532 store i16 %or, i16* %p, align 1 533 ret void 534 } 535 536 define void @rotate_right_m16(i16* %p, i32 %amount) { 537 ; X86-LABEL: rotate_right_m16: 538 ; X86: # %bb.0: 539 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 540 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 541 ; X86-NEXT: rorw %cl, (%eax) 542 ; X86-NEXT: retl 543 ; 544 ; X64-LABEL: rotate_right_m16: 545 ; X64: # %bb.0: 546 ; X64-NEXT: movl %esi, %ecx 547 ; X64-NEXT: rorw %cl, (%rdi) 548 ; X64-NEXT: retq 549 %x = load i16, i16* %p, align 1 550 %amt = trunc i32 %amount to i16 551 %sub = sub i16 0, %amt 552 %maskamt = and i16 %amt, 15 553 %masksub = and i16 %sub, 15 554 %shl = shl i16 %x, %masksub 555 %shr = lshr i16 %x, %maskamt 556 %or = or i16 %shl, %shr 557 store i16 %or, i16* %p, align 1 558 ret void 559 } 560 561 define i32 @rotate_demanded_bits(i32, i32) { 562 ; X86-LABEL: rotate_demanded_bits: 563 ; X86: # %bb.0: 564 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 565 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 566 ; X86-NEXT: andb $30, %cl 567 ; X86-NEXT: roll %cl, %eax 568 ; X86-NEXT: retl 569 ; 570 ; X64-LABEL: rotate_demanded_bits: 571 ; X64: # %bb.0: 572 ; X64-NEXT: andb $30, %sil 573 ; X64-NEXT: movl %esi, %ecx 574 ; X64-NEXT: roll %cl, %edi 575 ; X64-NEXT: movl %edi, %eax 576 ; X64-NEXT: retq 577 %3 = and i32 %1, 30 578 %4 = shl i32 %0, %3 579 %5 = sub nsw i32 0, %3 580 %6 = and i32 %5, 30 581 %7 = lshr i32 %0, %6 582 %8 = or i32 %7, %4 583 ret i32 %8 584 } 585 586 define i32 @rotate_demanded_bits_2(i32, i32) { 587 ; X86-LABEL: rotate_demanded_bits_2: 588 ; X86: # %bb.0: 589 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 590 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 591 ; X86-NEXT: andb $23, %cl 592 ; X86-NEXT: roll %cl, %eax 593 ; X86-NEXT: retl 594 ; 595 ; X64-LABEL: rotate_demanded_bits_2: 596 ; X64: # %bb.0: 597 ; X64-NEXT: andb $23, %sil 598 ; X64-NEXT: movl %esi, %ecx 599 ; X64-NEXT: roll %cl, %edi 600 ; X64-NEXT: movl %edi, %eax 601 ; X64-NEXT: retq 602 %3 = and i32 %1, 23 603 %4 = shl i32 %0, %3 604 %5 = sub nsw i32 0, %3 605 %6 = and i32 %5, 31 606 %7 = lshr i32 %0, %6 607 %8 = or i32 %7, %4 608 ret i32 %8 609 } 610 611 define i32 @rotate_demanded_bits_3(i32, i32) { 612 ; X86-LABEL: rotate_demanded_bits_3: 613 ; X86: # %bb.0: 614 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 615 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 616 ; X86-NEXT: addb %cl, %cl 617 ; X86-NEXT: andb $30, %cl 618 ; X86-NEXT: roll %cl, %eax 619 ; X86-NEXT: retl 620 ; 621 ; X64-LABEL: rotate_demanded_bits_3: 622 ; X64: # %bb.0: 623 ; X64-NEXT: addb %sil, %sil 624 ; X64-NEXT: andb $30, %sil 625 ; X64-NEXT: movl %esi, %ecx 626 ; X64-NEXT: roll %cl, %edi 627 ; X64-NEXT: movl %edi, %eax 628 ; X64-NEXT: retq 629 %3 = shl i32 %1, 1 630 %4 = and i32 %3, 30 631 %5 = shl i32 %0, %4 632 %6 = sub i32 0, %3 633 %7 = and i32 %6, 30 634 %8 = lshr i32 %0, %7 635 %9 = or i32 %5, %8 636 ret i32 %9 637 } 638