Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
      4 
      5 ; test vector shifts converted to proper SSE2 vector shifts when the shift
      6 ; amounts are the same.
      7 
      8 ; Note that x86 does have ashr
      9 
     10 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
     11 ; X32-LABEL: shift1a:
     12 ; X32:       # %bb.0: # %entry
     13 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     14 ; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
     15 ; X32-NEXT:    psrad $31, %xmm0
     16 ; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
     17 ; X32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
     18 ; X32-NEXT:    movdqa %xmm1, (%eax)
     19 ; X32-NEXT:    retl
     20 ;
     21 ; X64-LABEL: shift1a:
     22 ; X64:       # %bb.0: # %entry
     23 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
     24 ; X64-NEXT:    psrad $31, %xmm0
     25 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
     26 ; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
     27 ; X64-NEXT:    movdqa %xmm1, (%rdi)
     28 ; X64-NEXT:    retq
     29 entry:
     30   %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
     31   store <2 x i64> %ashr, <2 x i64>* %dst
     32   ret void
     33 }
     34 
     35 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
     36 ; X32-LABEL: shift2a:
     37 ; X32:       # %bb.0: # %entry
     38 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     39 ; X32-NEXT:    psrad $5, %xmm0
     40 ; X32-NEXT:    movdqa %xmm0, (%eax)
     41 ; X32-NEXT:    retl
     42 ;
     43 ; X64-LABEL: shift2a:
     44 ; X64:       # %bb.0: # %entry
     45 ; X64-NEXT:    psrad $5, %xmm0
     46 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     47 ; X64-NEXT:    retq
     48 entry:
     49   %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
     50   store <4 x i32> %ashr, <4 x i32>* %dst
     51   ret void
     52 }
     53 
     54 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
     55 ; X32-LABEL: shift2b:
     56 ; X32:       # %bb.0: # %entry
     57 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     58 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     59 ; X32-NEXT:    psrad %xmm1, %xmm0
     60 ; X32-NEXT:    movdqa %xmm0, (%eax)
     61 ; X32-NEXT:    retl
     62 ;
     63 ; X64-LABEL: shift2b:
     64 ; X64:       # %bb.0: # %entry
     65 ; X64-NEXT:    movd %esi, %xmm1
     66 ; X64-NEXT:    psrad %xmm1, %xmm0
     67 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     68 ; X64-NEXT:    retq
     69 entry:
     70   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
     71   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
     72   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
     73   %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
     74   %ashr = ashr <4 x i32> %val, %3
     75   store <4 x i32> %ashr, <4 x i32>* %dst
     76   ret void
     77 }
     78 
     79 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
     80 ; X32-LABEL: shift3a:
     81 ; X32:       # %bb.0: # %entry
     82 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     83 ; X32-NEXT:    psraw $5, %xmm0
     84 ; X32-NEXT:    movdqa %xmm0, (%eax)
     85 ; X32-NEXT:    retl
     86 ;
     87 ; X64-LABEL: shift3a:
     88 ; X64:       # %bb.0: # %entry
     89 ; X64-NEXT:    psraw $5, %xmm0
     90 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     91 ; X64-NEXT:    retq
     92 entry:
     93   %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
     94   store <8 x i16> %ashr, <8 x i16>* %dst
     95   ret void
     96 }
     97 
     98 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
     99 ; X32-LABEL: shift3b:
    100 ; X32:       # %bb.0: # %entry
    101 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    102 ; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
    103 ; X32-NEXT:    movd %ecx, %xmm1
    104 ; X32-NEXT:    psraw %xmm1, %xmm0
    105 ; X32-NEXT:    movdqa %xmm0, (%eax)
    106 ; X32-NEXT:    retl
    107 ;
    108 ; X64-LABEL: shift3b:
    109 ; X64:       # %bb.0: # %entry
    110 ; X64-NEXT:    movzwl %si, %eax
    111 ; X64-NEXT:    movd %eax, %xmm1
    112 ; X64-NEXT:    psraw %xmm1, %xmm0
    113 ; X64-NEXT:    movdqa %xmm0, (%rdi)
    114 ; X64-NEXT:    retq
    115 entry:
    116   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
    117   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
    118   %2 = insertelement <8 x i16> %1, i16 %amt, i32 2
    119   %3 = insertelement <8 x i16> %2, i16 %amt, i32 3
    120   %4 = insertelement <8 x i16> %3, i16 %amt, i32 4
    121   %5 = insertelement <8 x i16> %4, i16 %amt, i32 5
    122   %6 = insertelement <8 x i16> %5, i16 %amt, i32 6
    123   %7 = insertelement <8 x i16> %6, i16 %amt, i32 7
    124   %ashr = ashr <8 x i16> %val, %7
    125   store <8 x i16> %ashr, <8 x i16>* %dst
    126   ret void
    127 }
    128