Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686--   -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2
      4 
      5 declare i8 @llvm.fshl.i8(i8, i8, i8)
      6 declare i16 @llvm.fshl.i16(i16, i16, i16)
      7 declare i32 @llvm.fshl.i32(i32, i32, i32)
      8 declare i64 @llvm.fshl.i64(i64, i64, i64)
      9 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
     10 
     11 declare i8 @llvm.fshr.i8(i8, i8, i8)
     12 declare i16 @llvm.fshr.i16(i16, i16, i16)
     13 declare i32 @llvm.fshr.i32(i32, i32, i32)
     14 declare i64 @llvm.fshr.i64(i64, i64, i64)
     15 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
     16 
     17 ; When first 2 operands match, it's a rotate.
     18 
     19 define i8 @rotl_i8_const_shift(i8 %x) nounwind {
     20 ; X32-SSE2-LABEL: rotl_i8_const_shift:
     21 ; X32-SSE2:       # %bb.0:
     22 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
     23 ; X32-SSE2-NEXT:    rolb $3, %al
     24 ; X32-SSE2-NEXT:    retl
     25 ;
     26 ; X64-AVX2-LABEL: rotl_i8_const_shift:
     27 ; X64-AVX2:       # %bb.0:
     28 ; X64-AVX2-NEXT:    rolb $3, %dil
     29 ; X64-AVX2-NEXT:    movl %edi, %eax
     30 ; X64-AVX2-NEXT:    retq
     31   %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
     32   ret i8 %f
     33 }
     34 
     35 define i64 @rotl_i64_const_shift(i64 %x) nounwind {
     36 ; X32-SSE2-LABEL: rotl_i64_const_shift:
     37 ; X32-SSE2:       # %bb.0:
     38 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     39 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
     40 ; X32-SSE2-NEXT:    movl %ecx, %eax
     41 ; X32-SSE2-NEXT:    shldl $3, %edx, %eax
     42 ; X32-SSE2-NEXT:    shldl $3, %ecx, %edx
     43 ; X32-SSE2-NEXT:    retl
     44 ;
     45 ; X64-AVX2-LABEL: rotl_i64_const_shift:
     46 ; X64-AVX2:       # %bb.0:
     47 ; X64-AVX2-NEXT:    rolq $3, %rdi
     48 ; X64-AVX2-NEXT:    movq %rdi, %rax
     49 ; X64-AVX2-NEXT:    retq
     50   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
     51   ret i64 %f
     52 }
     53 
     54 define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
     55 ; X32-SSE2-LABEL: rotl_i16:
     56 ; X32-SSE2:       # %bb.0:
     57 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
     58 ; X32-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
     59 ; X32-SSE2-NEXT:    rolw %cl, %ax
     60 ; X32-SSE2-NEXT:    retl
     61 ;
     62 ; X64-AVX2-LABEL: rotl_i16:
     63 ; X64-AVX2:       # %bb.0:
     64 ; X64-AVX2-NEXT:    movl %esi, %ecx
     65 ; X64-AVX2-NEXT:    rolw %cl, %di
     66 ; X64-AVX2-NEXT:    movl %edi, %eax
     67 ; X64-AVX2-NEXT:    retq
     68   %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
     69   ret i16 %f
     70 }
     71 
     72 define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
     73 ; X32-SSE2-LABEL: rotl_i32:
     74 ; X32-SSE2:       # %bb.0:
     75 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
     76 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
     77 ; X32-SSE2-NEXT:    roll %cl, %eax
     78 ; X32-SSE2-NEXT:    retl
     79 ;
     80 ; X64-AVX2-LABEL: rotl_i32:
     81 ; X64-AVX2:       # %bb.0:
     82 ; X64-AVX2-NEXT:    movl %esi, %ecx
     83 ; X64-AVX2-NEXT:    roll %cl, %edi
     84 ; X64-AVX2-NEXT:    movl %edi, %eax
     85 ; X64-AVX2-NEXT:    retq
     86   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
     87   ret i32 %f
     88 }
     89 
     90 ; Vector rotate.
     91 
     92 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
     93 ; X32-SSE2-LABEL: rotl_v4i32:
     94 ; X32-SSE2:       # %bb.0:
     95 ; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32]
     96 ; X32-SSE2-NEXT:    psubd %xmm1, %xmm3
     97 ; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [31,31,31,31]
     98 ; X32-SSE2-NEXT:    pand %xmm4, %xmm3
     99 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm3[2,3,3,3,4,5,6,7]
    100 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm5
    101 ; X32-SSE2-NEXT:    psrld %xmm2, %xmm5
    102 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm3[0,1,1,1,4,5,6,7]
    103 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
    104 ; X32-SSE2-NEXT:    psrld %xmm6, %xmm2
    105 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
    106 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
    107 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm5 = xmm3[2,3,3,3,4,5,6,7]
    108 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm6
    109 ; X32-SSE2-NEXT:    psrld %xmm5, %xmm6
    110 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,1,1,4,5,6,7]
    111 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm5
    112 ; X32-SSE2-NEXT:    psrld %xmm3, %xmm5
    113 ; X32-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
    114 ; X32-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm5[0,3]
    115 ; X32-SSE2-NEXT:    pand %xmm4, %xmm1
    116 ; X32-SSE2-NEXT:    pslld $23, %xmm1
    117 ; X32-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
    118 ; X32-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
    119 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
    120 ; X32-SSE2-NEXT:    pmuludq %xmm1, %xmm0
    121 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    122 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    123 ; X32-SSE2-NEXT:    pmuludq %xmm3, %xmm1
    124 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    125 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    126 ; X32-SSE2-NEXT:    orps %xmm0, %xmm2
    127 ; X32-SSE2-NEXT:    movaps %xmm2, %xmm0
    128 ; X32-SSE2-NEXT:    retl
    129 ;
    130 ; X64-AVX2-LABEL: rotl_v4i32:
    131 ; X64-AVX2:       # %bb.0:
    132 ; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
    133 ; X64-AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
    134 ; X64-AVX2-NEXT:    vpsllvd %xmm3, %xmm0, %xmm3
    135 ; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
    136 ; X64-AVX2-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
    137 ; X64-AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
    138 ; X64-AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
    139 ; X64-AVX2-NEXT:    vpor %xmm0, %xmm3, %xmm0
    140 ; X64-AVX2-NEXT:    retq
    141   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
    142   ret <4 x i32> %f
    143 }
    144 
    145 ; Vector rotate by constant splat amount.
    146 
    147 define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) nounwind {
    148 ; X32-SSE2-LABEL: rotl_v4i32_const_shift:
    149 ; X32-SSE2:       # %bb.0:
    150 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
    151 ; X32-SSE2-NEXT:    psrld $29, %xmm1
    152 ; X32-SSE2-NEXT:    pslld $3, %xmm0
    153 ; X32-SSE2-NEXT:    por %xmm1, %xmm0
    154 ; X32-SSE2-NEXT:    retl
    155 ;
    156 ; X64-AVX2-LABEL: rotl_v4i32_const_shift:
    157 ; X64-AVX2:       # %bb.0:
    158 ; X64-AVX2-NEXT:    vpsrld $29, %xmm0, %xmm1
    159 ; X64-AVX2-NEXT:    vpslld $3, %xmm0, %xmm0
    160 ; X64-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
    161 ; X64-AVX2-NEXT:    retq
    162   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
    163   ret <4 x i32> %f
    164 }
    165 
    166 ; Repeat everything for funnel shift right.
    167 
    168 define i8 @rotr_i8_const_shift(i8 %x) nounwind {
    169 ; X32-SSE2-LABEL: rotr_i8_const_shift:
    170 ; X32-SSE2:       # %bb.0:
    171 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
    172 ; X32-SSE2-NEXT:    rorb $3, %al
    173 ; X32-SSE2-NEXT:    retl
    174 ;
    175 ; X64-AVX2-LABEL: rotr_i8_const_shift:
    176 ; X64-AVX2:       # %bb.0:
    177 ; X64-AVX2-NEXT:    rorb $3, %dil
    178 ; X64-AVX2-NEXT:    movl %edi, %eax
    179 ; X64-AVX2-NEXT:    retq
    180   %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
    181   ret i8 %f
    182 }
    183 
    184 define i32 @rotr_i32_const_shift(i32 %x) nounwind {
    185 ; X32-SSE2-LABEL: rotr_i32_const_shift:
    186 ; X32-SSE2:       # %bb.0:
    187 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    188 ; X32-SSE2-NEXT:    rorl $3, %eax
    189 ; X32-SSE2-NEXT:    retl
    190 ;
    191 ; X64-AVX2-LABEL: rotr_i32_const_shift:
    192 ; X64-AVX2:       # %bb.0:
    193 ; X64-AVX2-NEXT:    rorl $3, %edi
    194 ; X64-AVX2-NEXT:    movl %edi, %eax
    195 ; X64-AVX2-NEXT:    retq
    196   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
    197   ret i32 %f
    198 }
    199 
    200 ; When first 2 operands match, it's a rotate (by variable amount).
    201 
    202 define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
    203 ; X32-SSE2-LABEL: rotr_i16:
    204 ; X32-SSE2:       # %bb.0:
    205 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
    206 ; X32-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    207 ; X32-SSE2-NEXT:    rorw %cl, %ax
    208 ; X32-SSE2-NEXT:    retl
    209 ;
    210 ; X64-AVX2-LABEL: rotr_i16:
    211 ; X64-AVX2:       # %bb.0:
    212 ; X64-AVX2-NEXT:    movl %esi, %ecx
    213 ; X64-AVX2-NEXT:    rorw %cl, %di
    214 ; X64-AVX2-NEXT:    movl %edi, %eax
    215 ; X64-AVX2-NEXT:    retq
    216   %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
    217   ret i16 %f
    218 }
    219 
    220 define i64 @rotr_i64(i64 %x, i64 %z) nounwind {
    221 ; X32-SSE2-LABEL: rotr_i64:
    222 ; X32-SSE2:       # %bb.0:
    223 ; X32-SSE2-NEXT:    pushl %ebp
    224 ; X32-SSE2-NEXT:    pushl %ebx
    225 ; X32-SSE2-NEXT:    pushl %edi
    226 ; X32-SSE2-NEXT:    pushl %esi
    227 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
    228 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
    229 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    230 ; X32-SSE2-NEXT:    movl %eax, %ecx
    231 ; X32-SSE2-NEXT:    andl $63, %ecx
    232 ; X32-SSE2-NEXT:    movl %edx, %edi
    233 ; X32-SSE2-NEXT:    shrl %cl, %edi
    234 ; X32-SSE2-NEXT:    movl %esi, %ebx
    235 ; X32-SSE2-NEXT:    shrdl %cl, %edx, %ebx
    236 ; X32-SSE2-NEXT:    xorl %ebp, %ebp
    237 ; X32-SSE2-NEXT:    testb $32, %cl
    238 ; X32-SSE2-NEXT:    cmovnel %edi, %ebx
    239 ; X32-SSE2-NEXT:    cmovnel %ebp, %edi
    240 ; X32-SSE2-NEXT:    movl $64, %ecx
    241 ; X32-SSE2-NEXT:    subl %eax, %ecx
    242 ; X32-SSE2-NEXT:    andl $63, %ecx
    243 ; X32-SSE2-NEXT:    movl %esi, %eax
    244 ; X32-SSE2-NEXT:    shll %cl, %eax
    245 ; X32-SSE2-NEXT:    shldl %cl, %esi, %edx
    246 ; X32-SSE2-NEXT:    testb $32, %cl
    247 ; X32-SSE2-NEXT:    cmovnel %eax, %edx
    248 ; X32-SSE2-NEXT:    cmovnel %ebp, %eax
    249 ; X32-SSE2-NEXT:    orl %ebx, %eax
    250 ; X32-SSE2-NEXT:    orl %edi, %edx
    251 ; X32-SSE2-NEXT:    popl %esi
    252 ; X32-SSE2-NEXT:    popl %edi
    253 ; X32-SSE2-NEXT:    popl %ebx
    254 ; X32-SSE2-NEXT:    popl %ebp
    255 ; X32-SSE2-NEXT:    retl
    256 ;
    257 ; X64-AVX2-LABEL: rotr_i64:
    258 ; X64-AVX2:       # %bb.0:
    259 ; X64-AVX2-NEXT:    movl %esi, %ecx
    260 ; X64-AVX2-NEXT:    rorq %cl, %rdi
    261 ; X64-AVX2-NEXT:    movq %rdi, %rax
    262 ; X64-AVX2-NEXT:    retq
    263   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
    264   ret i64 %f
    265 }
    266 
    267 ; Vector rotate.
    268 
    269 define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
    270 ; X32-SSE2-LABEL: rotr_v4i32:
    271 ; X32-SSE2:       # %bb.0:
    272 ; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [31,31,31,31]
    273 ; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32]
    274 ; X32-SSE2-NEXT:    psubd %xmm1, %xmm3
    275 ; X32-SSE2-NEXT:    movdqa %xmm1, %xmm4
    276 ; X32-SSE2-NEXT:    pand %xmm2, %xmm4
    277 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm4[2,3,3,3,4,5,6,7]
    278 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm5
    279 ; X32-SSE2-NEXT:    psrld %xmm1, %xmm5
    280 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm4[0,1,1,1,4,5,6,7]
    281 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
    282 ; X32-SSE2-NEXT:    psrld %xmm6, %xmm1
    283 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
    284 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
    285 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
    286 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm6
    287 ; X32-SSE2-NEXT:    psrld %xmm5, %xmm6
    288 ; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[0,1,1,1,4,5,6,7]
    289 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm5
    290 ; X32-SSE2-NEXT:    psrld %xmm4, %xmm5
    291 ; X32-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
    292 ; X32-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm5[0,3]
    293 ; X32-SSE2-NEXT:    pand %xmm2, %xmm3
    294 ; X32-SSE2-NEXT:    pslld $23, %xmm3
    295 ; X32-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm3
    296 ; X32-SSE2-NEXT:    cvttps2dq %xmm3, %xmm2
    297 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
    298 ; X32-SSE2-NEXT:    pmuludq %xmm2, %xmm0
    299 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    300 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
    301 ; X32-SSE2-NEXT:    pmuludq %xmm3, %xmm2
    302 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
    303 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    304 ; X32-SSE2-NEXT:    orps %xmm0, %xmm1
    305 ; X32-SSE2-NEXT:    movaps %xmm1, %xmm0
    306 ; X32-SSE2-NEXT:    retl
    307 ;
    308 ; X64-AVX2-LABEL: rotr_v4i32:
    309 ; X64-AVX2:       # %bb.0:
    310 ; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
    311 ; X64-AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
    312 ; X64-AVX2-NEXT:    vpsrlvd %xmm3, %xmm0, %xmm3
    313 ; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
    314 ; X64-AVX2-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
    315 ; X64-AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
    316 ; X64-AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
    317 ; X64-AVX2-NEXT:    vpor %xmm3, %xmm0, %xmm0
    318 ; X64-AVX2-NEXT:    retq
    319   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
    320   ret <4 x i32> %f
    321 }
    322 
    323 ; Vector rotate by constant splat amount.
    324 
    325 define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) nounwind {
    326 ; X32-SSE2-LABEL: rotr_v4i32_const_shift:
    327 ; X32-SSE2:       # %bb.0:
    328 ; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
    329 ; X32-SSE2-NEXT:    psrld $3, %xmm1
    330 ; X32-SSE2-NEXT:    pslld $29, %xmm0
    331 ; X32-SSE2-NEXT:    por %xmm1, %xmm0
    332 ; X32-SSE2-NEXT:    retl
    333 ;
    334 ; X64-AVX2-LABEL: rotr_v4i32_const_shift:
    335 ; X64-AVX2:       # %bb.0:
    336 ; X64-AVX2-NEXT:    vpsrld $3, %xmm0, %xmm1
    337 ; X64-AVX2-NEXT:    vpslld $29, %xmm0, %xmm0
    338 ; X64-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
    339 ; X64-AVX2-NEXT:    retq
    340   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
    341   ret <4 x i32> %f
    342 }
    343 
    344 define i32 @rotl_i32_shift_by_bitwidth(i32 %x) nounwind {
    345 ; X32-SSE2-LABEL: rotl_i32_shift_by_bitwidth:
    346 ; X32-SSE2:       # %bb.0:
    347 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    348 ; X32-SSE2-NEXT:    retl
    349 ;
    350 ; X64-AVX2-LABEL: rotl_i32_shift_by_bitwidth:
    351 ; X64-AVX2:       # %bb.0:
    352 ; X64-AVX2-NEXT:    movl %edi, %eax
    353 ; X64-AVX2-NEXT:    retq
    354   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
    355   ret i32 %f
    356 }
    357 
    358 define i32 @rotr_i32_shift_by_bitwidth(i32 %x) nounwind {
    359 ; X32-SSE2-LABEL: rotr_i32_shift_by_bitwidth:
    360 ; X32-SSE2:       # %bb.0:
    361 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
    362 ; X32-SSE2-NEXT:    retl
    363 ;
    364 ; X64-AVX2-LABEL: rotr_i32_shift_by_bitwidth:
    365 ; X64-AVX2:       # %bb.0:
    366 ; X64-AVX2-NEXT:    movl %edi, %eax
    367 ; X64-AVX2-NEXT:    retq
    368   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
    369   ret i32 %f
    370 }
    371 
    372 define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
    373 ; ANY-LABEL: rotl_v4i32_shift_by_bitwidth:
    374 ; ANY:       # %bb.0:
    375 ; ANY-NEXT:    ret{{[l|q]}}
    376   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
    377   ret <4 x i32> %f
    378 }
    379 
    380 define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
    381 ; ANY-LABEL: rotr_v4i32_shift_by_bitwidth:
    382 ; ANY:       # %bb.0:
    383 ; ANY-NEXT:    ret{{[l|q]}}
    384   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
    385   ret <4 x i32> %f
    386 }
    387 
    388