Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686--   -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2
      4 
      5 declare i8 @llvm.fshl.i8(i8, i8, i8)
      6 declare i16 @llvm.fshl.i16(i16, i16, i16)
      7 declare i32 @llvm.fshl.i32(i32, i32, i32)
      8 declare i64 @llvm.fshl.i64(i64, i64, i64)
      9 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
     10 
     11 declare i8 @llvm.fshr.i8(i8, i8, i8)
     12 declare i16 @llvm.fshr.i16(i16, i16, i16)
     13 declare i32 @llvm.fshr.i32(i32, i32, i32)
     14 declare i64 @llvm.fshr.i64(i64, i64, i64)
     15 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
     16 
     17 ; General case - all operands can be variables - x86 has shld, but that's not matched.
     18 
     19 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
     20 ; X32-SSE2-LABEL: fshl_i32:
     21 ; X32-SSE2:       # %bb.0:
     22 ; X32-SSE2-NEXT:    pushl %edi
     23 ; X32-SSE2-NEXT:    pushl %esi
     24 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
     25 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
     26 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
     27 ; X32-SSE2-NEXT:    movl $32, %ecx
     28 ; X32-SSE2-NEXT:    subl %edx, %ecx
     29 ; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
     30 ; X32-SSE2-NEXT:    shrl %cl, %edi
     31 ; X32-SSE2-NEXT:    andl $31, %edx
     32 ; X32-SSE2-NEXT:    movl %esi, %eax
     33 ; X32-SSE2-NEXT:    movl %edx, %ecx
     34 ; X32-SSE2-NEXT:    shll %cl, %eax
     35 ; X32-SSE2-NEXT:    orl %edi, %eax
     36 ; X32-SSE2-NEXT:    testl %edx, %edx
     37 ; X32-SSE2-NEXT:    cmovel %esi, %eax
     38 ; X32-SSE2-NEXT:    popl %esi
     39 ; X32-SSE2-NEXT:    popl %edi
     40 ; X32-SSE2-NEXT:    retl
     41 ;
     42 ; X64-AVX2-LABEL: fshl_i32:
     43 ; X64-AVX2:       # %bb.0:
     44 ; X64-AVX2-NEXT:    movl $32, %ecx
     45 ; X64-AVX2-NEXT:    subl %edx, %ecx
     46 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
     47 ; X64-AVX2-NEXT:    shrl %cl, %esi
     48 ; X64-AVX2-NEXT:    andl $31, %edx
     49 ; X64-AVX2-NEXT:    movl %edi, %eax
     50 ; X64-AVX2-NEXT:    movl %edx, %ecx
     51 ; X64-AVX2-NEXT:    shll %cl, %eax
     52 ; X64-AVX2-NEXT:    orl %esi, %eax
     53 ; X64-AVX2-NEXT:    testl %edx, %edx
     54 ; X64-AVX2-NEXT:    cmovel %edi, %eax
     55 ; X64-AVX2-NEXT:    retq
     56   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
     57   ret i32 %f
     58 }
     59 
     60 ; Verify that weird types are minimally supported.
     61 declare i37 @llvm.fshl.i37(i37, i37, i37)
     62 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
     63 ; X32-SSE2-LABEL: fshl_i37:
     64 ; X32-SSE2:       # %bb.0:
     65 ; X32-SSE2-NEXT:    pushl %ebp
     66 ; X32-SSE2-NEXT:    pushl %ebx
     67 ; X32-SSE2-NEXT:    pushl %edi
     68 ; X32-SSE2-NEXT:    pushl %esi
     69 ; X32-SSE2-NEXT:    subl $8, %esp
     70 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
     71 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
     72 ; X32-SSE2-NEXT:    andl $31, %esi
     73 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
     74 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
     75 ; X32-SSE2-NEXT:    movl %eax, %ebp
     76 ; X32-SSE2-NEXT:    andl $31, %ebp
     77 ; X32-SSE2-NEXT:    movl $37, %ecx
     78 ; X32-SSE2-NEXT:    subl %ebx, %ecx
     79 ; X32-SSE2-NEXT:    movl $0, %edx
     80 ; X32-SSE2-NEXT:    sbbl %eax, %edx
     81 ; X32-SSE2-NEXT:    andl $31, %edx
     82 ; X32-SSE2-NEXT:    pushl $0
     83 ; X32-SSE2-NEXT:    pushl $37
     84 ; X32-SSE2-NEXT:    pushl %edx
     85 ; X32-SSE2-NEXT:    pushl %ecx
     86 ; X32-SSE2-NEXT:    calll __umoddi3
     87 ; X32-SSE2-NEXT:    addl $16, %esp
     88 ; X32-SSE2-NEXT:    movl %eax, (%esp) # 4-byte Spill
     89 ; X32-SSE2-NEXT:    movl %eax, %ecx
     90 ; X32-SSE2-NEXT:    shrdl %cl, %esi, %edi
     91 ; X32-SSE2-NEXT:    pushl $0
     92 ; X32-SSE2-NEXT:    pushl $37
     93 ; X32-SSE2-NEXT:    pushl %ebp
     94 ; X32-SSE2-NEXT:    pushl %ebx
     95 ; X32-SSE2-NEXT:    calll __umoddi3
     96 ; X32-SSE2-NEXT:    addl $16, %esp
     97 ; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
     98 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
     99 ; X32-SSE2-NEXT:    movl %edx, %ebp
    100 ; X32-SSE2-NEXT:    movl %eax, %ecx
    101 ; X32-SSE2-NEXT:    shll %cl, %ebp
    102 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
    103 ; X32-SSE2-NEXT:    shldl %cl, %edx, %ebx
    104 ; X32-SSE2-NEXT:    testb $32, %al
    105 ; X32-SSE2-NEXT:    cmovnel %ebp, %ebx
    106 ; X32-SSE2-NEXT:    movl $0, %edx
    107 ; X32-SSE2-NEXT:    cmovnel %edx, %ebp
    108 ; X32-SSE2-NEXT:    movl (%esp), %ecx # 4-byte Reload
    109 ; X32-SSE2-NEXT:    shrl %cl, %esi
    110 ; X32-SSE2-NEXT:    testb $32, %cl
    111 ; X32-SSE2-NEXT:    cmovnel %esi, %edi
    112 ; X32-SSE2-NEXT:    cmovnel %edx, %esi
    113 ; X32-SSE2-NEXT:    orl %ebx, %esi
    114 ; X32-SSE2-NEXT:    orl %ebp, %edi
    115 ; X32-SSE2-NEXT:    orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
    116 ; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %edi
    117 ; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %esi
    118 ; X32-SSE2-NEXT:    movl %edi, %eax
    119 ; X32-SSE2-NEXT:    movl %esi, %edx
    120 ; X32-SSE2-NEXT:    addl $8, %esp
    121 ; X32-SSE2-NEXT:    popl %esi
    122 ; X32-SSE2-NEXT:    popl %edi
    123 ; X32-SSE2-NEXT:    popl %ebx
    124 ; X32-SSE2-NEXT:    popl %ebp
    125 ; X32-SSE2-NEXT:    retl
    126 ;
    127 ; X64-AVX2-LABEL: fshl_i37:
    128 ; X64-AVX2:       # %bb.0:
    129 ; X64-AVX2-NEXT:    pushq %rbx
    130 ; X64-AVX2-NEXT:    movq %rdx, %r10
    131 ; X64-AVX2-NEXT:    movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF
    132 ; X64-AVX2-NEXT:    andq %r8, %rsi
    133 ; X64-AVX2-NEXT:    movl $37, %r9d
    134 ; X64-AVX2-NEXT:    subq %rdx, %r9
    135 ; X64-AVX2-NEXT:    andq %r8, %r10
    136 ; X64-AVX2-NEXT:    movabsq $-2492803253203993461, %r11 # imm = 0xDD67C8A60DD67C8B
    137 ; X64-AVX2-NEXT:    movq %r10, %rax
    138 ; X64-AVX2-NEXT:    mulq %r11
    139 ; X64-AVX2-NEXT:    shrq $5, %rdx
    140 ; X64-AVX2-NEXT:    leaq (%rdx,%rdx,8), %rax
    141 ; X64-AVX2-NEXT:    leaq (%rdx,%rax,4), %rax
    142 ; X64-AVX2-NEXT:    subq %rax, %r10
    143 ; X64-AVX2-NEXT:    movq %rdi, %rbx
    144 ; X64-AVX2-NEXT:    movl %r10d, %ecx
    145 ; X64-AVX2-NEXT:    shlq %cl, %rbx
    146 ; X64-AVX2-NEXT:    andq %r9, %r8
    147 ; X64-AVX2-NEXT:    movq %r8, %rax
    148 ; X64-AVX2-NEXT:    mulq %r11
    149 ; X64-AVX2-NEXT:    shrq $5, %rdx
    150 ; X64-AVX2-NEXT:    leaq (%rdx,%rdx,8), %rax
    151 ; X64-AVX2-NEXT:    leal (%rdx,%rax,4), %eax
    152 ; X64-AVX2-NEXT:    subl %eax, %r9d
    153 ; X64-AVX2-NEXT:    movl %r9d, %ecx
    154 ; X64-AVX2-NEXT:    shrq %cl, %rsi
    155 ; X64-AVX2-NEXT:    orq %rbx, %rsi
    156 ; X64-AVX2-NEXT:    testq %r10, %r10
    157 ; X64-AVX2-NEXT:    cmoveq %rdi, %rsi
    158 ; X64-AVX2-NEXT:    movq %rsi, %rax
    159 ; X64-AVX2-NEXT:    popq %rbx
    160 ; X64-AVX2-NEXT:    retq
    161   %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
    162   ret i37 %f
    163 }
    164 
    165 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
    166 
    167 declare i7 @llvm.fshl.i7(i7, i7, i7)
    168 define i7 @fshl_i7_const_fold() {
    169 ; ANY-LABEL: fshl_i7_const_fold:
    170 ; ANY:       # %bb.0:
    171 ; ANY-NEXT:    movb $67, %al
    172 ; ANY-NEXT:    ret{{[l|q]}}
    173   %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
    174   ret i7 %f
    175 }
    176 
    177 ; With constant shift amount, this is 'shld' with constant operand.
    178 
    179 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind {
    180 ; X32-SSE2-LABEL: fshl_i32_const_shift:
    181 ; X32-SSE2:       # %bb.0:
    182 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    183 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    184 ; X32-SSE2-NEXT:    shldl $9, %ecx, %eax
    185 ; X32-SSE2-NEXT:    retl
    186 ;
    187 ; X64-AVX2-LABEL: fshl_i32_const_shift:
    188 ; X64-AVX2:       # %bb.0:
    189 ; X64-AVX2-NEXT:    shldl $9, %esi, %edi
    190 ; X64-AVX2-NEXT:    movl %edi, %eax
    191 ; X64-AVX2-NEXT:    retq
    192   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
    193   ret i32 %f
    194 }
    195 
    196 ; Check modulo math on shift amount.
    197 
    198 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind {
    199 ; X32-SSE2-LABEL: fshl_i32_const_overshift:
    200 ; X32-SSE2:       # %bb.0:
    201 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    202 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    203 ; X32-SSE2-NEXT:    shldl $9, %ecx, %eax
    204 ; X32-SSE2-NEXT:    retl
    205 ;
    206 ; X64-AVX2-LABEL: fshl_i32_const_overshift:
    207 ; X64-AVX2:       # %bb.0:
    208 ; X64-AVX2-NEXT:    shldl $9, %esi, %edi
    209 ; X64-AVX2-NEXT:    movl %edi, %eax
    210 ; X64-AVX2-NEXT:    retq
    211   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
    212   ret i32 %f
    213 }
    214 
    215 ; 64-bit should also work.
    216 
    217 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind {
    218 ; X32-SSE2-LABEL: fshl_i64_const_overshift:
    219 ; X32-SSE2:       # %bb.0:
    220 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    221 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    222 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
    223 ; X32-SSE2-NEXT:    shldl $9, %ecx, %edx
    224 ; X32-SSE2-NEXT:    shrdl $23, %ecx, %eax
    225 ; X32-SSE2-NEXT:    retl
    226 ;
    227 ; X64-AVX2-LABEL: fshl_i64_const_overshift:
    228 ; X64-AVX2:       # %bb.0:
    229 ; X64-AVX2-NEXT:    shldq $41, %rsi, %rdi
    230 ; X64-AVX2-NEXT:    movq %rdi, %rax
    231 ; X64-AVX2-NEXT:    retq
    232   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
    233   ret i64 %f
    234 }
    235 
    236 ; This should work without any node-specific logic.
    237 
    238 define i8 @fshl_i8_const_fold() nounwind {
    239 ; ANY-LABEL: fshl_i8_const_fold:
    240 ; ANY:       # %bb.0:
    241 ; ANY-NEXT:    movb $-128, %al
    242 ; ANY-NEXT:    ret{{[l|q]}}
    243   %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
    244   ret i8 %f
    245 }
    246 
    247 ; Repeat everything for funnel shift right.
    248 
    249 ; General case - all operands can be variables - x86 has 'shrd', but this doesn't match.
    250 
    251 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
    252 ; X32-SSE2-LABEL: fshr_i32:
    253 ; X32-SSE2:       # %bb.0:
    254 ; X32-SSE2-NEXT:    pushl %ebx
    255 ; X32-SSE2-NEXT:    pushl %edi
    256 ; X32-SSE2-NEXT:    pushl %esi
    257 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
    258 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
    259 ; X32-SSE2-NEXT:    movl $32, %ebx
    260 ; X32-SSE2-NEXT:    subl %edx, %ebx
    261 ; X32-SSE2-NEXT:    andl $31, %edx
    262 ; X32-SSE2-NEXT:    movl %esi, %edi
    263 ; X32-SSE2-NEXT:    movl %edx, %ecx
    264 ; X32-SSE2-NEXT:    shrl %cl, %edi
    265 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    266 ; X32-SSE2-NEXT:    movl %ebx, %ecx
    267 ; X32-SSE2-NEXT:    shll %cl, %eax
    268 ; X32-SSE2-NEXT:    orl %edi, %eax
    269 ; X32-SSE2-NEXT:    testl %edx, %edx
    270 ; X32-SSE2-NEXT:    cmovel %esi, %eax
    271 ; X32-SSE2-NEXT:    popl %esi
    272 ; X32-SSE2-NEXT:    popl %edi
    273 ; X32-SSE2-NEXT:    popl %ebx
    274 ; X32-SSE2-NEXT:    retl
    275 ;
    276 ; X64-AVX2-LABEL: fshr_i32:
    277 ; X64-AVX2:       # %bb.0:
    278 ; X64-AVX2-NEXT:    movl $32, %r8d
    279 ; X64-AVX2-NEXT:    subl %edx, %r8d
    280 ; X64-AVX2-NEXT:    andl $31, %edx
    281 ; X64-AVX2-NEXT:    movl %esi, %eax
    282 ; X64-AVX2-NEXT:    movl %edx, %ecx
    283 ; X64-AVX2-NEXT:    shrl %cl, %eax
    284 ; X64-AVX2-NEXT:    movl %r8d, %ecx
    285 ; X64-AVX2-NEXT:    shll %cl, %edi
    286 ; X64-AVX2-NEXT:    orl %eax, %edi
    287 ; X64-AVX2-NEXT:    testl %edx, %edx
    288 ; X64-AVX2-NEXT:    cmovel %esi, %edi
    289 ; X64-AVX2-NEXT:    movl %edi, %eax
    290 ; X64-AVX2-NEXT:    retq
    291   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
    292   ret i32 %f
    293 }
    294 
    295 ; Verify that weird types are minimally supported.
    296 declare i37 @llvm.fshr.i37(i37, i37, i37)
    297 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
    298 ; X32-SSE2-LABEL: fshr_i37:
    299 ; X32-SSE2:       # %bb.0:
    300 ; X32-SSE2-NEXT:    pushl %ebp
    301 ; X32-SSE2-NEXT:    pushl %ebx
    302 ; X32-SSE2-NEXT:    pushl %edi
    303 ; X32-SSE2-NEXT:    pushl %esi
    304 ; X32-SSE2-NEXT:    pushl %eax
    305 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
    306 ; X32-SSE2-NEXT:    andl $31, %esi
    307 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebp
    308 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    309 ; X32-SSE2-NEXT:    andl $31, %eax
    310 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
    311 ; X32-SSE2-NEXT:    pushl $0
    312 ; X32-SSE2-NEXT:    pushl $37
    313 ; X32-SSE2-NEXT:    pushl %eax
    314 ; X32-SSE2-NEXT:    pushl %ebp
    315 ; X32-SSE2-NEXT:    calll __umoddi3
    316 ; X32-SSE2-NEXT:    addl $16, %esp
    317 ; X32-SSE2-NEXT:    movl %eax, %ebx
    318 ; X32-SSE2-NEXT:    movl %edx, (%esp) # 4-byte Spill
    319 ; X32-SSE2-NEXT:    movl $37, %eax
    320 ; X32-SSE2-NEXT:    subl %ebp, %eax
    321 ; X32-SSE2-NEXT:    movl $0, %edx
    322 ; X32-SSE2-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
    323 ; X32-SSE2-NEXT:    andl $31, %edx
    324 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebp
    325 ; X32-SSE2-NEXT:    movl %ebx, %ecx
    326 ; X32-SSE2-NEXT:    shrdl %cl, %esi, %ebp
    327 ; X32-SSE2-NEXT:    pushl $0
    328 ; X32-SSE2-NEXT:    pushl $37
    329 ; X32-SSE2-NEXT:    pushl %edx
    330 ; X32-SSE2-NEXT:    pushl %eax
    331 ; X32-SSE2-NEXT:    calll __umoddi3
    332 ; X32-SSE2-NEXT:    addl $16, %esp
    333 ; X32-SSE2-NEXT:    movl %eax, %ecx
    334 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
    335 ; X32-SSE2-NEXT:    movl %edx, %eax
    336 ; X32-SSE2-NEXT:    shll %cl, %eax
    337 ; X32-SSE2-NEXT:    shldl %cl, %edx, %edi
    338 ; X32-SSE2-NEXT:    testb $32, %cl
    339 ; X32-SSE2-NEXT:    cmovnel %eax, %edi
    340 ; X32-SSE2-NEXT:    movl $0, %edx
    341 ; X32-SSE2-NEXT:    cmovnel %edx, %eax
    342 ; X32-SSE2-NEXT:    movl %ebx, %ecx
    343 ; X32-SSE2-NEXT:    shrl %cl, %esi
    344 ; X32-SSE2-NEXT:    testb $32, %bl
    345 ; X32-SSE2-NEXT:    cmovnel %esi, %ebp
    346 ; X32-SSE2-NEXT:    cmovnel %edx, %esi
    347 ; X32-SSE2-NEXT:    orl %edi, %esi
    348 ; X32-SSE2-NEXT:    orl %eax, %ebp
    349 ; X32-SSE2-NEXT:    orl %ebx, (%esp) # 4-byte Folded Spill
    350 ; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %ebp
    351 ; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %esi
    352 ; X32-SSE2-NEXT:    movl %ebp, %eax
    353 ; X32-SSE2-NEXT:    movl %esi, %edx
    354 ; X32-SSE2-NEXT:    addl $4, %esp
    355 ; X32-SSE2-NEXT:    popl %esi
    356 ; X32-SSE2-NEXT:    popl %edi
    357 ; X32-SSE2-NEXT:    popl %ebx
    358 ; X32-SSE2-NEXT:    popl %ebp
    359 ; X32-SSE2-NEXT:    retl
    360 ;
    361 ; X64-AVX2-LABEL: fshr_i37:
    362 ; X64-AVX2:       # %bb.0:
    363 ; X64-AVX2-NEXT:    pushq %rbx
    364 ; X64-AVX2-NEXT:    movq %rdx, %r10
    365 ; X64-AVX2-NEXT:    movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF
    366 ; X64-AVX2-NEXT:    movq %rsi, %r11
    367 ; X64-AVX2-NEXT:    andq %r8, %r11
    368 ; X64-AVX2-NEXT:    movl $37, %r9d
    369 ; X64-AVX2-NEXT:    subq %rdx, %r9
    370 ; X64-AVX2-NEXT:    andq %r8, %r10
    371 ; X64-AVX2-NEXT:    movabsq $-2492803253203993461, %rbx # imm = 0xDD67C8A60DD67C8B
    372 ; X64-AVX2-NEXT:    movq %r10, %rax
    373 ; X64-AVX2-NEXT:    mulq %rbx
    374 ; X64-AVX2-NEXT:    shrq $5, %rdx
    375 ; X64-AVX2-NEXT:    leaq (%rdx,%rdx,8), %rax
    376 ; X64-AVX2-NEXT:    leaq (%rdx,%rax,4), %rax
    377 ; X64-AVX2-NEXT:    subq %rax, %r10
    378 ; X64-AVX2-NEXT:    movl %r10d, %ecx
    379 ; X64-AVX2-NEXT:    shrq %cl, %r11
    380 ; X64-AVX2-NEXT:    andq %r9, %r8
    381 ; X64-AVX2-NEXT:    movq %r8, %rax
    382 ; X64-AVX2-NEXT:    mulq %rbx
    383 ; X64-AVX2-NEXT:    shrq $5, %rdx
    384 ; X64-AVX2-NEXT:    leaq (%rdx,%rdx,8), %rax
    385 ; X64-AVX2-NEXT:    leal (%rdx,%rax,4), %eax
    386 ; X64-AVX2-NEXT:    subl %eax, %r9d
    387 ; X64-AVX2-NEXT:    movl %r9d, %ecx
    388 ; X64-AVX2-NEXT:    shlq %cl, %rdi
    389 ; X64-AVX2-NEXT:    orq %r11, %rdi
    390 ; X64-AVX2-NEXT:    testq %r10, %r10
    391 ; X64-AVX2-NEXT:    cmoveq %rsi, %rdi
    392 ; X64-AVX2-NEXT:    movq %rdi, %rax
    393 ; X64-AVX2-NEXT:    popq %rbx
    394 ; X64-AVX2-NEXT:    retq
    395   %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
    396   ret i37 %f
    397 }
    398 
    399 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
    400 
    401 declare i7 @llvm.fshr.i7(i7, i7, i7)
    402 define i7 @fshr_i7_const_fold() nounwind {
    403 ; ANY-LABEL: fshr_i7_const_fold:
    404 ; ANY:       # %bb.0:
    405 ; ANY-NEXT:    movb $31, %al
    406 ; ANY-NEXT:    ret{{[l|q]}}
    407   %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
    408   ret i7 %f
    409 }
    410 
    411 ; With constant shift amount, this is 'shrd' or 'shld'.
    412 
    413 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind {
    414 ; X32-SSE2-LABEL: fshr_i32_const_shift:
    415 ; X32-SSE2:       # %bb.0:
    416 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    417 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    418 ; X32-SSE2-NEXT:    shldl $23, %ecx, %eax
    419 ; X32-SSE2-NEXT:    retl
    420 ;
    421 ; X64-AVX2-LABEL: fshr_i32_const_shift:
    422 ; X64-AVX2:       # %bb.0:
    423 ; X64-AVX2-NEXT:    shldl $23, %esi, %edi
    424 ; X64-AVX2-NEXT:    movl %edi, %eax
    425 ; X64-AVX2-NEXT:    retq
    426   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
    427   ret i32 %f
    428 }
    429 
    430 ; Check modulo math on shift amount. 41-32=9, but right-shift became left, so 32-9=23.
    431 
    432 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
    433 ; X32-SSE2-LABEL: fshr_i32_const_overshift:
    434 ; X32-SSE2:       # %bb.0:
    435 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    436 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    437 ; X32-SSE2-NEXT:    shldl $23, %ecx, %eax
    438 ; X32-SSE2-NEXT:    retl
    439 ;
    440 ; X64-AVX2-LABEL: fshr_i32_const_overshift:
    441 ; X64-AVX2:       # %bb.0:
    442 ; X64-AVX2-NEXT:    shldl $23, %esi, %edi
    443 ; X64-AVX2-NEXT:    movl %edi, %eax
    444 ; X64-AVX2-NEXT:    retq
    445   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
    446   ret i32 %f
    447 }
    448 
    449 ; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23.
    450 
    451 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
    452 ; X32-SSE2-LABEL: fshr_i64_const_overshift:
    453 ; X32-SSE2:       # %bb.0:
    454 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    455 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    456 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
    457 ; X32-SSE2-NEXT:    shrdl $9, %ecx, %eax
    458 ; X32-SSE2-NEXT:    shldl $23, %ecx, %edx
    459 ; X32-SSE2-NEXT:    retl
    460 ;
    461 ; X64-AVX2-LABEL: fshr_i64_const_overshift:
    462 ; X64-AVX2:       # %bb.0:
    463 ; X64-AVX2-NEXT:    shldq $23, %rsi, %rdi
    464 ; X64-AVX2-NEXT:    movq %rdi, %rax
    465 ; X64-AVX2-NEXT:    retq
    466   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
    467   ret i64 %f
    468 }
    469 
    470 ; This should work without any node-specific logic.
    471 
    472 define i8 @fshr_i8_const_fold() nounwind {
    473 ; ANY-LABEL: fshr_i8_const_fold:
    474 ; ANY:       # %bb.0:
    475 ; ANY-NEXT:    movb $-2, %al
    476 ; ANY-NEXT:    ret{{[l|q]}}
    477   %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
    478   ret i8 %f
    479 }
    480 
    481 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
    482 ; X32-SSE2-LABEL: fshl_i32_shift_by_bitwidth:
    483 ; X32-SSE2:       # %bb.0:
    484 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    485 ; X32-SSE2-NEXT:    retl
    486 ;
    487 ; X64-AVX2-LABEL: fshl_i32_shift_by_bitwidth:
    488 ; X64-AVX2:       # %bb.0:
    489 ; X64-AVX2-NEXT:    movl %edi, %eax
    490 ; X64-AVX2-NEXT:    retq
    491   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
    492   ret i32 %f
    493 }
    494 
    495 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
    496 ; X32-SSE2-LABEL: fshr_i32_shift_by_bitwidth:
    497 ; X32-SSE2:       # %bb.0:
    498 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    499 ; X32-SSE2-NEXT:    retl
    500 ;
    501 ; X64-AVX2-LABEL: fshr_i32_shift_by_bitwidth:
    502 ; X64-AVX2:       # %bb.0:
    503 ; X64-AVX2-NEXT:    movl %esi, %eax
    504 ; X64-AVX2-NEXT:    retq
    505   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
    506   ret i32 %f
    507 }
    508 
    509 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
    510 ; ANY-LABEL: fshl_v4i32_shift_by_bitwidth:
    511 ; ANY:       # %bb.0:
    512 ; ANY-NEXT:    ret{{[l|q]}}
    513   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
    514   ret <4 x i32> %f
    515 }
    516 
    517 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
    518 ; X32-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth:
    519 ; X32-SSE2:       # %bb.0:
    520 ; X32-SSE2-NEXT:    movaps %xmm1, %xmm0
    521 ; X32-SSE2-NEXT:    retl
    522 ;
    523 ; X64-AVX2-LABEL: fshr_v4i32_shift_by_bitwidth:
    524 ; X64-AVX2:       # %bb.0:
    525 ; X64-AVX2-NEXT:    vmovaps %xmm1, %xmm0
    526 ; X64-AVX2-NEXT:    retq
    527   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
    528   ret <4 x i32> %f
    529 }
    530 
    531