Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
      4 
      5 ; Check that we recognize this idiom for rotation too:
      6 ;    a << (b & (OpSize-1)) | a >> ((0 - b) & (OpSize-1))
      7 
      8 define i32 @rotate_left_32(i32 %a, i32 %b) {
      9 ; X86-LABEL: rotate_left_32:
     10 ; X86:       # %bb.0:
     11 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
     12 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     13 ; X86-NEXT:    roll %cl, %eax
     14 ; X86-NEXT:    retl
     15 ;
     16 ; X64-LABEL: rotate_left_32:
     17 ; X64:       # %bb.0:
     18 ; X64-NEXT:    movl %esi, %ecx
     19 ; X64-NEXT:    roll %cl, %edi
     20 ; X64-NEXT:    movl %edi, %eax
     21 ; X64-NEXT:    retq
     22   %and = and i32 %b, 31
     23   %shl = shl i32 %a, %and
     24   %t0 = sub i32 0, %b
     25   %and3 = and i32 %t0, 31
     26   %shr = lshr i32 %a, %and3
     27   %or = or i32 %shl, %shr
     28   ret i32 %or
     29 }
     30 
     31 define i32 @rotate_right_32(i32 %a, i32 %b) {
     32 ; X86-LABEL: rotate_right_32:
     33 ; X86:       # %bb.0:
     34 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
     35 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     36 ; X86-NEXT:    rorl %cl, %eax
     37 ; X86-NEXT:    retl
     38 ;
     39 ; X64-LABEL: rotate_right_32:
     40 ; X64:       # %bb.0:
     41 ; X64-NEXT:    movl %esi, %ecx
     42 ; X64-NEXT:    rorl %cl, %edi
     43 ; X64-NEXT:    movl %edi, %eax
     44 ; X64-NEXT:    retq
     45   %and = and i32 %b, 31
     46   %shl = lshr i32 %a, %and
     47   %t0 = sub i32 0, %b
     48   %and3 = and i32 %t0, 31
     49   %shr = shl i32 %a, %and3
     50   %or = or i32 %shl, %shr
     51   ret i32 %or
     52 }
     53 
     54 define i64 @rotate_left_64(i64 %a, i64 %b) {
     55 ; X86-LABEL: rotate_left_64:
     56 ; X86:       # %bb.0:
     57 ; X86-NEXT:    pushl %ebx
     58 ; X86-NEXT:    .cfi_def_cfa_offset 8
     59 ; X86-NEXT:    pushl %edi
     60 ; X86-NEXT:    .cfi_def_cfa_offset 12
     61 ; X86-NEXT:    pushl %esi
     62 ; X86-NEXT:    .cfi_def_cfa_offset 16
     63 ; X86-NEXT:    .cfi_offset %esi, -16
     64 ; X86-NEXT:    .cfi_offset %edi, -12
     65 ; X86-NEXT:    .cfi_offset %ebx, -8
     66 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
     67 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
     68 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     69 ; X86-NEXT:    movl %esi, %eax
     70 ; X86-NEXT:    shll %cl, %eax
     71 ; X86-NEXT:    movl %edi, %edx
     72 ; X86-NEXT:    shldl %cl, %esi, %edx
     73 ; X86-NEXT:    testb $32, %cl
     74 ; X86-NEXT:    je .LBB2_2
     75 ; X86-NEXT:  # %bb.1:
     76 ; X86-NEXT:    movl %eax, %edx
     77 ; X86-NEXT:    xorl %eax, %eax
     78 ; X86-NEXT:  .LBB2_2:
     79 ; X86-NEXT:    negl %ecx
     80 ; X86-NEXT:    movl %edi, %ebx
     81 ; X86-NEXT:    shrl %cl, %ebx
     82 ; X86-NEXT:    shrdl %cl, %edi, %esi
     83 ; X86-NEXT:    testb $32, %cl
     84 ; X86-NEXT:    je .LBB2_4
     85 ; X86-NEXT:  # %bb.3:
     86 ; X86-NEXT:    movl %ebx, %esi
     87 ; X86-NEXT:    xorl %ebx, %ebx
     88 ; X86-NEXT:  .LBB2_4:
     89 ; X86-NEXT:    orl %ebx, %edx
     90 ; X86-NEXT:    orl %esi, %eax
     91 ; X86-NEXT:    popl %esi
     92 ; X86-NEXT:    .cfi_def_cfa_offset 12
     93 ; X86-NEXT:    popl %edi
     94 ; X86-NEXT:    .cfi_def_cfa_offset 8
     95 ; X86-NEXT:    popl %ebx
     96 ; X86-NEXT:    .cfi_def_cfa_offset 4
     97 ; X86-NEXT:    retl
     98 ;
     99 ; X64-LABEL: rotate_left_64:
    100 ; X64:       # %bb.0:
    101 ; X64-NEXT:    movl %esi, %ecx
    102 ; X64-NEXT:    rolq %cl, %rdi
    103 ; X64-NEXT:    movq %rdi, %rax
    104 ; X64-NEXT:    retq
    105   %and = and i64 %b, 63
    106   %shl = shl i64 %a, %and
    107   %t0 = sub i64 0, %b
    108   %and3 = and i64 %t0, 63
    109   %shr = lshr i64 %a, %and3
    110   %or = or i64 %shl, %shr
    111   ret i64 %or
    112 }
    113 
    114 define i64 @rotate_right_64(i64 %a, i64 %b) {
    115 ; X86-LABEL: rotate_right_64:
    116 ; X86:       # %bb.0:
    117 ; X86-NEXT:    pushl %ebx
    118 ; X86-NEXT:    .cfi_def_cfa_offset 8
    119 ; X86-NEXT:    pushl %edi
    120 ; X86-NEXT:    .cfi_def_cfa_offset 12
    121 ; X86-NEXT:    pushl %esi
    122 ; X86-NEXT:    .cfi_def_cfa_offset 16
    123 ; X86-NEXT:    .cfi_offset %esi, -16
    124 ; X86-NEXT:    .cfi_offset %edi, -12
    125 ; X86-NEXT:    .cfi_offset %ebx, -8
    126 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
    127 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
    128 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    129 ; X86-NEXT:    movl %esi, %edx
    130 ; X86-NEXT:    shrl %cl, %edx
    131 ; X86-NEXT:    movl %edi, %eax
    132 ; X86-NEXT:    shrdl %cl, %esi, %eax
    133 ; X86-NEXT:    testb $32, %cl
    134 ; X86-NEXT:    je .LBB3_2
    135 ; X86-NEXT:  # %bb.1:
    136 ; X86-NEXT:    movl %edx, %eax
    137 ; X86-NEXT:    xorl %edx, %edx
    138 ; X86-NEXT:  .LBB3_2:
    139 ; X86-NEXT:    negl %ecx
    140 ; X86-NEXT:    movl %edi, %ebx
    141 ; X86-NEXT:    shll %cl, %ebx
    142 ; X86-NEXT:    shldl %cl, %edi, %esi
    143 ; X86-NEXT:    testb $32, %cl
    144 ; X86-NEXT:    je .LBB3_4
    145 ; X86-NEXT:  # %bb.3:
    146 ; X86-NEXT:    movl %ebx, %esi
    147 ; X86-NEXT:    xorl %ebx, %ebx
    148 ; X86-NEXT:  .LBB3_4:
    149 ; X86-NEXT:    orl %esi, %edx
    150 ; X86-NEXT:    orl %ebx, %eax
    151 ; X86-NEXT:    popl %esi
    152 ; X86-NEXT:    .cfi_def_cfa_offset 12
    153 ; X86-NEXT:    popl %edi
    154 ; X86-NEXT:    .cfi_def_cfa_offset 8
    155 ; X86-NEXT:    popl %ebx
    156 ; X86-NEXT:    .cfi_def_cfa_offset 4
    157 ; X86-NEXT:    retl
    158 ;
    159 ; X64-LABEL: rotate_right_64:
    160 ; X64:       # %bb.0:
    161 ; X64-NEXT:    movl %esi, %ecx
    162 ; X64-NEXT:    rorq %cl, %rdi
    163 ; X64-NEXT:    movq %rdi, %rax
    164 ; X64-NEXT:    retq
    165   %and = and i64 %b, 63
    166   %shl = lshr i64 %a, %and
    167   %t0 = sub i64 0, %b
    168   %and3 = and i64 %t0, 63
    169   %shr = shl i64 %a, %and3
    170   %or = or i64 %shl, %shr
    171   ret i64 %or
    172 }
    173 
    174 ; Also check mem operand.
    175 
    176 define void @rotate_left_m32(i32 *%pa, i32 %b) {
    177 ; X86-LABEL: rotate_left_m32:
    178 ; X86:       # %bb.0:
    179 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    180 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    181 ; X86-NEXT:    roll %cl, (%eax)
    182 ; X86-NEXT:    retl
    183 ;
    184 ; X64-LABEL: rotate_left_m32:
    185 ; X64:       # %bb.0:
    186 ; X64-NEXT:    movl %esi, %ecx
    187 ; X64-NEXT:    roll %cl, (%rdi)
    188 ; X64-NEXT:    retq
    189   %a = load i32, i32* %pa, align 16
    190   %and = and i32 %b, 31
    191   %shl = shl i32 %a, %and
    192   %t0 = sub i32 0, %b
    193   %and3 = and i32 %t0, 31
    194   %shr = lshr i32 %a, %and3
    195   %or = or i32 %shl, %shr
    196   store i32 %or, i32* %pa, align 32
    197   ret void
    198 }
    199 
    200 define void @rotate_right_m32(i32 *%pa, i32 %b) {
    201 ; X86-LABEL: rotate_right_m32:
    202 ; X86:       # %bb.0:
    203 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    204 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    205 ; X86-NEXT:    rorl %cl, (%eax)
    206 ; X86-NEXT:    retl
    207 ;
    208 ; X64-LABEL: rotate_right_m32:
    209 ; X64:       # %bb.0:
    210 ; X64-NEXT:    movl %esi, %ecx
    211 ; X64-NEXT:    rorl %cl, (%rdi)
    212 ; X64-NEXT:    retq
    213   %a = load i32, i32* %pa, align 16
    214   %and = and i32 %b, 31
    215   %shl = lshr i32 %a, %and
    216   %t0 = sub i32 0, %b
    217   %and3 = and i32 %t0, 31
    218   %shr = shl i32 %a, %and3
    219   %or = or i32 %shl, %shr
    220   store i32 %or, i32* %pa, align 32
    221   ret void
    222 }
    223 
    224 define void @rotate_left_m64(i64 *%pa, i64 %b) {
    225 ; X86-LABEL: rotate_left_m64:
    226 ; X86:       # %bb.0:
    227 ; X86-NEXT:    pushl %ebp
    228 ; X86-NEXT:    .cfi_def_cfa_offset 8
    229 ; X86-NEXT:    pushl %ebx
    230 ; X86-NEXT:    .cfi_def_cfa_offset 12
    231 ; X86-NEXT:    pushl %edi
    232 ; X86-NEXT:    .cfi_def_cfa_offset 16
    233 ; X86-NEXT:    pushl %esi
    234 ; X86-NEXT:    .cfi_def_cfa_offset 20
    235 ; X86-NEXT:    .cfi_offset %esi, -20
    236 ; X86-NEXT:    .cfi_offset %edi, -16
    237 ; X86-NEXT:    .cfi_offset %ebx, -12
    238 ; X86-NEXT:    .cfi_offset %ebp, -8
    239 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    240 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    241 ; X86-NEXT:    movl (%eax), %edx
    242 ; X86-NEXT:    movl 4(%eax), %ebx
    243 ; X86-NEXT:    movl %edx, %esi
    244 ; X86-NEXT:    shll %cl, %esi
    245 ; X86-NEXT:    movl %ebx, %edi
    246 ; X86-NEXT:    shldl %cl, %edx, %edi
    247 ; X86-NEXT:    testb $32, %cl
    248 ; X86-NEXT:    je .LBB6_2
    249 ; X86-NEXT:  # %bb.1:
    250 ; X86-NEXT:    movl %esi, %edi
    251 ; X86-NEXT:    xorl %esi, %esi
    252 ; X86-NEXT:  .LBB6_2:
    253 ; X86-NEXT:    negl %ecx
    254 ; X86-NEXT:    movl %ebx, %ebp
    255 ; X86-NEXT:    shrl %cl, %ebp
    256 ; X86-NEXT:    shrdl %cl, %ebx, %edx
    257 ; X86-NEXT:    testb $32, %cl
    258 ; X86-NEXT:    je .LBB6_4
    259 ; X86-NEXT:  # %bb.3:
    260 ; X86-NEXT:    movl %ebp, %edx
    261 ; X86-NEXT:    xorl %ebp, %ebp
    262 ; X86-NEXT:  .LBB6_4:
    263 ; X86-NEXT:    orl %ebp, %edi
    264 ; X86-NEXT:    orl %edx, %esi
    265 ; X86-NEXT:    movl %edi, 4(%eax)
    266 ; X86-NEXT:    movl %esi, (%eax)
    267 ; X86-NEXT:    popl %esi
    268 ; X86-NEXT:    .cfi_def_cfa_offset 16
    269 ; X86-NEXT:    popl %edi
    270 ; X86-NEXT:    .cfi_def_cfa_offset 12
    271 ; X86-NEXT:    popl %ebx
    272 ; X86-NEXT:    .cfi_def_cfa_offset 8
    273 ; X86-NEXT:    popl %ebp
    274 ; X86-NEXT:    .cfi_def_cfa_offset 4
    275 ; X86-NEXT:    retl
    276 ;
    277 ; X64-LABEL: rotate_left_m64:
    278 ; X64:       # %bb.0:
    279 ; X64-NEXT:    movl %esi, %ecx
    280 ; X64-NEXT:    rolq %cl, (%rdi)
    281 ; X64-NEXT:    retq
    282   %a = load i64, i64* %pa, align 16
    283   %and = and i64 %b, 63
    284   %shl = shl i64 %a, %and
    285   %t0 = sub i64 0, %b
    286   %and3 = and i64 %t0, 63
    287   %shr = lshr i64 %a, %and3
    288   %or = or i64 %shl, %shr
    289   store i64 %or, i64* %pa, align 64
    290   ret void
    291 }
    292 
    293 define void @rotate_right_m64(i64 *%pa, i64 %b) {
    294 ; X86-LABEL: rotate_right_m64:
    295 ; X86:       # %bb.0:
    296 ; X86-NEXT:    pushl %ebp
    297 ; X86-NEXT:    .cfi_def_cfa_offset 8
    298 ; X86-NEXT:    pushl %ebx
    299 ; X86-NEXT:    .cfi_def_cfa_offset 12
    300 ; X86-NEXT:    pushl %edi
    301 ; X86-NEXT:    .cfi_def_cfa_offset 16
    302 ; X86-NEXT:    pushl %esi
    303 ; X86-NEXT:    .cfi_def_cfa_offset 20
    304 ; X86-NEXT:    .cfi_offset %esi, -20
    305 ; X86-NEXT:    .cfi_offset %edi, -16
    306 ; X86-NEXT:    .cfi_offset %ebx, -12
    307 ; X86-NEXT:    .cfi_offset %ebp, -8
    308 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    309 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    310 ; X86-NEXT:    movl (%eax), %ebx
    311 ; X86-NEXT:    movl 4(%eax), %edx
    312 ; X86-NEXT:    movl %edx, %esi
    313 ; X86-NEXT:    shrl %cl, %esi
    314 ; X86-NEXT:    movl %ebx, %edi
    315 ; X86-NEXT:    shrdl %cl, %edx, %edi
    316 ; X86-NEXT:    testb $32, %cl
    317 ; X86-NEXT:    je .LBB7_2
    318 ; X86-NEXT:  # %bb.1:
    319 ; X86-NEXT:    movl %esi, %edi
    320 ; X86-NEXT:    xorl %esi, %esi
    321 ; X86-NEXT:  .LBB7_2:
    322 ; X86-NEXT:    negl %ecx
    323 ; X86-NEXT:    movl %ebx, %ebp
    324 ; X86-NEXT:    shll %cl, %ebp
    325 ; X86-NEXT:    shldl %cl, %ebx, %edx
    326 ; X86-NEXT:    testb $32, %cl
    327 ; X86-NEXT:    je .LBB7_4
    328 ; X86-NEXT:  # %bb.3:
    329 ; X86-NEXT:    movl %ebp, %edx
    330 ; X86-NEXT:    xorl %ebp, %ebp
    331 ; X86-NEXT:  .LBB7_4:
    332 ; X86-NEXT:    orl %edx, %esi
    333 ; X86-NEXT:    orl %ebp, %edi
    334 ; X86-NEXT:    movl %esi, 4(%eax)
    335 ; X86-NEXT:    movl %edi, (%eax)
    336 ; X86-NEXT:    popl %esi
    337 ; X86-NEXT:    .cfi_def_cfa_offset 16
    338 ; X86-NEXT:    popl %edi
    339 ; X86-NEXT:    .cfi_def_cfa_offset 12
    340 ; X86-NEXT:    popl %ebx
    341 ; X86-NEXT:    .cfi_def_cfa_offset 8
    342 ; X86-NEXT:    popl %ebp
    343 ; X86-NEXT:    .cfi_def_cfa_offset 4
    344 ; X86-NEXT:    retl
    345 ;
    346 ; X64-LABEL: rotate_right_m64:
    347 ; X64:       # %bb.0:
    348 ; X64-NEXT:    movl %esi, %ecx
    349 ; X64-NEXT:    rorq %cl, (%rdi)
    350 ; X64-NEXT:    retq
    351   %a = load i64, i64* %pa, align 16
    352   %and = and i64 %b, 63
    353   %shl = lshr i64 %a, %and
    354   %t0 = sub i64 0, %b
    355   %and3 = and i64 %t0, 63
    356   %shr = shl i64 %a, %and3
    357   %or = or i64 %shl, %shr
    358   store i64 %or, i64* %pa, align 64
    359   ret void
    360 }
    361 
    362 ; The next 8 tests include masks of the narrow width shift amounts that should be eliminated.
    363 ; These patterns are produced by instcombine after r310509.
    364 
    365 define i8 @rotate_left_8(i8 %x, i32 %amount) {
    366 ; X86-LABEL: rotate_left_8:
    367 ; X86:       # %bb.0:
    368 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    369 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    370 ; X86-NEXT:    rolb %cl, %al
    371 ; X86-NEXT:    retl
    372 ;
    373 ; X64-LABEL: rotate_left_8:
    374 ; X64:       # %bb.0:
    375 ; X64-NEXT:    movl %esi, %ecx
    376 ; X64-NEXT:    rolb %cl, %dil
    377 ; X64-NEXT:    movl %edi, %eax
    378 ; X64-NEXT:    retq
    379   %amt = trunc i32 %amount to i8
    380   %sub = sub i8 0, %amt
    381   %maskamt = and i8 %amt, 7
    382   %masksub = and i8 %sub, 7
    383   %shl = shl i8 %x, %maskamt
    384   %shr = lshr i8 %x, %masksub
    385   %or = or i8 %shl, %shr
    386   ret i8 %or
    387 }
    388 
    389 define i8 @rotate_right_8(i8 %x, i32 %amount) {
    390 ; X86-LABEL: rotate_right_8:
    391 ; X86:       # %bb.0:
    392 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    393 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    394 ; X86-NEXT:    rorb %cl, %al
    395 ; X86-NEXT:    retl
    396 ;
    397 ; X64-LABEL: rotate_right_8:
    398 ; X64:       # %bb.0:
    399 ; X64-NEXT:    movl %esi, %ecx
    400 ; X64-NEXT:    rorb %cl, %dil
    401 ; X64-NEXT:    movl %edi, %eax
    402 ; X64-NEXT:    retq
    403   %amt = trunc i32 %amount to i8
    404   %sub = sub i8 0, %amt
    405   %maskamt = and i8 %amt, 7
    406   %masksub = and i8 %sub, 7
    407   %shr = lshr i8 %x, %maskamt
    408   %shl = shl i8 %x, %masksub
    409   %or = or i8 %shr, %shl
    410   ret i8 %or
    411 }
    412 
    413 define i16 @rotate_left_16(i16 %x, i32 %amount) {
    414 ; X86-LABEL: rotate_left_16:
    415 ; X86:       # %bb.0:
    416 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    417 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    418 ; X86-NEXT:    rolw %cl, %ax
    419 ; X86-NEXT:    retl
    420 ;
    421 ; X64-LABEL: rotate_left_16:
    422 ; X64:       # %bb.0:
    423 ; X64-NEXT:    movl %esi, %ecx
    424 ; X64-NEXT:    rolw %cl, %di
    425 ; X64-NEXT:    movl %edi, %eax
    426 ; X64-NEXT:    retq
    427   %amt = trunc i32 %amount to i16
    428   %sub = sub i16 0, %amt
    429   %maskamt = and i16 %amt, 15
    430   %masksub = and i16 %sub, 15
    431   %shl = shl i16 %x, %maskamt
    432   %shr = lshr i16 %x, %masksub
    433   %or = or i16 %shl, %shr
    434   ret i16 %or
    435 }
    436 
    437 define i16 @rotate_right_16(i16 %x, i32 %amount) {
    438 ; X86-LABEL: rotate_right_16:
    439 ; X86:       # %bb.0:
    440 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    441 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    442 ; X86-NEXT:    rorw %cl, %ax
    443 ; X86-NEXT:    retl
    444 ;
    445 ; X64-LABEL: rotate_right_16:
    446 ; X64:       # %bb.0:
    447 ; X64-NEXT:    movl %esi, %ecx
    448 ; X64-NEXT:    rorw %cl, %di
    449 ; X64-NEXT:    movl %edi, %eax
    450 ; X64-NEXT:    retq
    451   %amt = trunc i32 %amount to i16
    452   %sub = sub i16 0, %amt
    453   %maskamt = and i16 %amt, 15
    454   %masksub = and i16 %sub, 15
    455   %shr = lshr i16 %x, %maskamt
    456   %shl = shl i16 %x, %masksub
    457   %or = or i16 %shr, %shl
    458   ret i16 %or
    459 }
    460 
    461 define void @rotate_left_m8(i8* %p, i32 %amount) {
    462 ; X86-LABEL: rotate_left_m8:
    463 ; X86:       # %bb.0:
    464 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    465 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    466 ; X86-NEXT:    rolb %cl, (%eax)
    467 ; X86-NEXT:    retl
    468 ;
    469 ; X64-LABEL: rotate_left_m8:
    470 ; X64:       # %bb.0:
    471 ; X64-NEXT:    movl %esi, %ecx
    472 ; X64-NEXT:    rolb %cl, (%rdi)
    473 ; X64-NEXT:    retq
    474   %x = load i8, i8* %p, align 1
    475   %amt = trunc i32 %amount to i8
    476   %sub = sub i8 0, %amt
    477   %maskamt = and i8 %amt, 7
    478   %masksub = and i8 %sub, 7
    479   %shl = shl i8 %x, %maskamt
    480   %shr = lshr i8 %x, %masksub
    481   %or = or i8 %shl, %shr
    482   store i8 %or, i8* %p, align 1
    483   ret void
    484 }
    485 
    486 define void @rotate_right_m8(i8* %p, i32 %amount) {
    487 ; X86-LABEL: rotate_right_m8:
    488 ; X86:       # %bb.0:
    489 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    490 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    491 ; X86-NEXT:    rorb %cl, (%eax)
    492 ; X86-NEXT:    retl
    493 ;
    494 ; X64-LABEL: rotate_right_m8:
    495 ; X64:       # %bb.0:
    496 ; X64-NEXT:    movl %esi, %ecx
    497 ; X64-NEXT:    rorb %cl, (%rdi)
    498 ; X64-NEXT:    retq
    499   %x = load i8, i8* %p, align 1
    500   %amt = trunc i32 %amount to i8
    501   %sub = sub i8 0, %amt
    502   %maskamt = and i8 %amt, 7
    503   %masksub = and i8 %sub, 7
    504   %shl = shl i8 %x, %masksub
    505   %shr = lshr i8 %x, %maskamt
    506   %or = or i8 %shl, %shr
    507   store i8 %or, i8* %p, align 1
    508   ret void
    509 }
    510 
    511 define void @rotate_left_m16(i16* %p, i32 %amount) {
    512 ; X86-LABEL: rotate_left_m16:
    513 ; X86:       # %bb.0:
    514 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    515 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    516 ; X86-NEXT:    rolw %cl, (%eax)
    517 ; X86-NEXT:    retl
    518 ;
    519 ; X64-LABEL: rotate_left_m16:
    520 ; X64:       # %bb.0:
    521 ; X64-NEXT:    movl %esi, %ecx
    522 ; X64-NEXT:    rolw %cl, (%rdi)
    523 ; X64-NEXT:    retq
    524   %x = load i16, i16* %p, align 1
    525   %amt = trunc i32 %amount to i16
    526   %sub = sub i16 0, %amt
    527   %maskamt = and i16 %amt, 15
    528   %masksub = and i16 %sub, 15
    529   %shl = shl i16 %x, %maskamt
    530   %shr = lshr i16 %x, %masksub
    531   %or = or i16 %shl, %shr
    532   store i16 %or, i16* %p, align 1
    533   ret void
    534 }
    535 
    536 define void @rotate_right_m16(i16* %p, i32 %amount) {
    537 ; X86-LABEL: rotate_right_m16:
    538 ; X86:       # %bb.0:
    539 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    540 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    541 ; X86-NEXT:    rorw %cl, (%eax)
    542 ; X86-NEXT:    retl
    543 ;
    544 ; X64-LABEL: rotate_right_m16:
    545 ; X64:       # %bb.0:
    546 ; X64-NEXT:    movl %esi, %ecx
    547 ; X64-NEXT:    rorw %cl, (%rdi)
    548 ; X64-NEXT:    retq
    549   %x = load i16, i16* %p, align 1
    550   %amt = trunc i32 %amount to i16
    551   %sub = sub i16 0, %amt
    552   %maskamt = and i16 %amt, 15
    553   %masksub = and i16 %sub, 15
    554   %shl = shl i16 %x, %masksub
    555   %shr = lshr i16 %x, %maskamt
    556   %or = or i16 %shl, %shr
    557   store i16 %or, i16* %p, align 1
    558   ret void
    559 }
    560 
    561 define i32 @rotate_demanded_bits(i32, i32) {
    562 ; X86-LABEL: rotate_demanded_bits:
    563 ; X86:       # %bb.0:
    564 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    565 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    566 ; X86-NEXT:    andb $30, %cl
    567 ; X86-NEXT:    roll %cl, %eax
    568 ; X86-NEXT:    retl
    569 ;
    570 ; X64-LABEL: rotate_demanded_bits:
    571 ; X64:       # %bb.0:
    572 ; X64-NEXT:    andb $30, %sil
    573 ; X64-NEXT:    movl %esi, %ecx
    574 ; X64-NEXT:    roll %cl, %edi
    575 ; X64-NEXT:    movl %edi, %eax
    576 ; X64-NEXT:    retq
    577   %3 = and i32 %1, 30
    578   %4 = shl i32 %0, %3
    579   %5 = sub nsw i32 0, %3
    580   %6 = and i32 %5, 30
    581   %7 = lshr i32 %0, %6
    582   %8 = or i32 %7, %4
    583   ret i32 %8
    584 }
    585 
    586 define i32 @rotate_demanded_bits_2(i32, i32) {
    587 ; X86-LABEL: rotate_demanded_bits_2:
    588 ; X86:       # %bb.0:
    589 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    590 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    591 ; X86-NEXT:    andb $23, %cl
    592 ; X86-NEXT:    roll %cl, %eax
    593 ; X86-NEXT:    retl
    594 ;
    595 ; X64-LABEL: rotate_demanded_bits_2:
    596 ; X64:       # %bb.0:
    597 ; X64-NEXT:    andb $23, %sil
    598 ; X64-NEXT:    movl %esi, %ecx
    599 ; X64-NEXT:    roll %cl, %edi
    600 ; X64-NEXT:    movl %edi, %eax
    601 ; X64-NEXT:    retq
    602   %3 = and i32 %1, 23
    603   %4 = shl i32 %0, %3
    604   %5 = sub nsw i32 0, %3
    605   %6 = and i32 %5, 31
    606   %7 = lshr i32 %0, %6
    607   %8 = or i32 %7, %4
    608   ret i32 %8
    609 }
    610 
    611 define i32 @rotate_demanded_bits_3(i32, i32) {
    612 ; X86-LABEL: rotate_demanded_bits_3:
    613 ; X86:       # %bb.0:
    614 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    615 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    616 ; X86-NEXT:    addb %cl, %cl
    617 ; X86-NEXT:    andb $30, %cl
    618 ; X86-NEXT:    roll %cl, %eax
    619 ; X86-NEXT:    retl
    620 ;
    621 ; X64-LABEL: rotate_demanded_bits_3:
    622 ; X64:       # %bb.0:
    623 ; X64-NEXT:    addb %sil, %sil
    624 ; X64-NEXT:    andb $30, %sil
    625 ; X64-NEXT:    movl %esi, %ecx
    626 ; X64-NEXT:    roll %cl, %edi
    627 ; X64-NEXT:    movl %edi, %eax
    628 ; X64-NEXT:    retq
    629   %3 = shl i32 %1, 1
    630   %4 = and i32 %3, 30
    631   %5 = shl i32 %0, %4
    632   %6 = sub i32 0, %3
    633   %7 = and i32 %6, 30
    634   %8 = lshr i32 %0, %7
    635   %9 = or i32 %5, %8
    636   ret i32 %9
    637 }
    638