Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
      4 
      5 ; When loading the shift amount from memory, avoid generating the splat.
      6 
      7 define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
      8 ; X32-LABEL: shift5a:
      9 ; X32:       # %bb.0: # %entry
     10 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     11 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     12 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     13 ; X32-NEXT:    pslld %xmm1, %xmm0
     14 ; X32-NEXT:    movdqa %xmm0, (%eax)
     15 ; X32-NEXT:    retl
     16 ;
     17 ; X64-LABEL: shift5a:
     18 ; X64:       # %bb.0: # %entry
     19 ; X64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     20 ; X64-NEXT:    pslld %xmm1, %xmm0
     21 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     22 ; X64-NEXT:    retq
     23 entry:
     24   %amt = load i32, i32* %pamt
     25   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
     26   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
     27   %shl = shl <4 x i32> %val, %shamt
     28   store <4 x i32> %shl, <4 x i32>* %dst
     29   ret void
     30 }
     31 
     32 
     33 define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
     34 ; X32-LABEL: shift5b:
     35 ; X32:       # %bb.0: # %entry
     36 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     37 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     38 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     39 ; X32-NEXT:    psrad %xmm1, %xmm0
     40 ; X32-NEXT:    movdqa %xmm0, (%eax)
     41 ; X32-NEXT:    retl
     42 ;
     43 ; X64-LABEL: shift5b:
     44 ; X64:       # %bb.0: # %entry
     45 ; X64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     46 ; X64-NEXT:    psrad %xmm1, %xmm0
     47 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     48 ; X64-NEXT:    retq
     49 entry:
     50   %amt = load i32, i32* %pamt
     51   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
     52   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
     53   %shr = ashr <4 x i32> %val, %shamt
     54   store <4 x i32> %shr, <4 x i32>* %dst
     55   ret void
     56 }
     57 
     58 
     59 define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
     60 ; X32-LABEL: shift5c:
     61 ; X32:       # %bb.0: # %entry
     62 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     63 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     64 ; X32-NEXT:    pslld %xmm1, %xmm0
     65 ; X32-NEXT:    movdqa %xmm0, (%eax)
     66 ; X32-NEXT:    retl
     67 ;
     68 ; X64-LABEL: shift5c:
     69 ; X64:       # %bb.0: # %entry
     70 ; X64-NEXT:    movd %esi, %xmm1
     71 ; X64-NEXT:    pslld %xmm1, %xmm0
     72 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     73 ; X64-NEXT:    retq
     74 entry:
     75   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
     76   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
     77   %shl = shl <4 x i32> %val, %shamt
     78   store <4 x i32> %shl, <4 x i32>* %dst
     79   ret void
     80 }
     81 
     82 
     83 define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
     84 ; X32-LABEL: shift5d:
     85 ; X32:       # %bb.0: # %entry
     86 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     87 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     88 ; X32-NEXT:    psrad %xmm1, %xmm0
     89 ; X32-NEXT:    movdqa %xmm0, (%eax)
     90 ; X32-NEXT:    retl
     91 ;
     92 ; X64-LABEL: shift5d:
     93 ; X64:       # %bb.0: # %entry
     94 ; X64-NEXT:    movd %esi, %xmm1
     95 ; X64-NEXT:    psrad %xmm1, %xmm0
     96 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     97 ; X64-NEXT:    retq
     98 entry:
     99   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
    100   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
    101   %shr = ashr <4 x i32> %val, %shamt
    102   store <4 x i32> %shr, <4 x i32>* %dst
    103   ret void
    104 }
    105