Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 | FileCheck %s
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
      4 
      5 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
      6 ; single paddd instruction. At the moment we produce the sequence
      7 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
      8 
      9 define double @test1(double %A) {
     10 ; CHECK-LABEL: test1:
     11 ; CHECK:       # %bb.0:
     12 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
     13 ; CHECK-NEXT:    paddd {{.*}}(%rip), %xmm0
     14 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     15 ; CHECK-NEXT:    retq
     16 ;
     17 ; CHECK-WIDE-LABEL: test1:
     18 ; CHECK-WIDE:       # %bb.0:
     19 ; CHECK-WIDE-NEXT:    paddd {{.*}}(%rip), %xmm0
     20 ; CHECK-WIDE-NEXT:    retq
     21   %1 = bitcast double %A to <2 x i32>
     22   %add = add <2 x i32> %1, <i32 3, i32 5>
     23   %2 = bitcast <2 x i32> %add to double
     24   ret double %2
     25 }
     26 
     27 define double @test2(double %A, double %B) {
     28 ; CHECK-LABEL: test2:
     29 ; CHECK:       # %bb.0:
     30 ; CHECK-NEXT:    paddd %xmm1, %xmm0
     31 ; CHECK-NEXT:    retq
     32 ;
     33 ; CHECK-WIDE-LABEL: test2:
     34 ; CHECK-WIDE:       # %bb.0:
     35 ; CHECK-WIDE-NEXT:    paddd %xmm1, %xmm0
     36 ; CHECK-WIDE-NEXT:    retq
     37   %1 = bitcast double %A to <2 x i32>
     38   %2 = bitcast double %B to <2 x i32>
     39   %add = add <2 x i32> %1, %2
     40   %3 = bitcast <2 x i32> %add to double
     41   ret double %3
     42 }
     43 
     44 define i64 @test3(i64 %A) {
     45 ; CHECK-LABEL: test3:
     46 ; CHECK:       # %bb.0:
     47 ; CHECK-NEXT:    movq %rdi, %xmm0
     48 ; CHECK-NEXT:    addps {{.*}}(%rip), %xmm0
     49 ; CHECK-NEXT:    movq %xmm0, %rax
     50 ; CHECK-NEXT:    retq
     51 ;
     52 ; CHECK-WIDE-LABEL: test3:
     53 ; CHECK-WIDE:       # %bb.0:
     54 ; CHECK-WIDE-NEXT:    movq %rdi, %xmm0
     55 ; CHECK-WIDE-NEXT:    addps {{.*}}(%rip), %xmm0
     56 ; CHECK-WIDE-NEXT:    movq %xmm0, %rax
     57 ; CHECK-WIDE-NEXT:    retq
     58   %1 = bitcast i64 %A to <2 x float>
     59   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
     60   %2 = bitcast <2 x float> %add to i64
     61   ret i64 %2
     62 }
     63 
     64 ; FIXME: Ideally we should be able to fold the entire body of @test4 into a
     65 ; single paddd instruction. This is fixed with the widening legalization.
     66 
     67 define i64 @test4(i64 %A) {
     68 ; CHECK-LABEL: test4:
     69 ; CHECK:       # %bb.0:
     70 ; CHECK-NEXT:    movq %rdi, %xmm0
     71 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
     72 ; CHECK-NEXT:    paddd {{.*}}(%rip), %xmm0
     73 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     74 ; CHECK-NEXT:    movq %xmm0, %rax
     75 ; CHECK-NEXT:    retq
     76 ;
     77 ; CHECK-WIDE-LABEL: test4:
     78 ; CHECK-WIDE:       # %bb.0:
     79 ; CHECK-WIDE-NEXT:    movq %rdi, %xmm0
     80 ; CHECK-WIDE-NEXT:    paddd {{.*}}(%rip), %xmm0
     81 ; CHECK-WIDE-NEXT:    movq %xmm0, %rax
     82 ; CHECK-WIDE-NEXT:    retq
     83   %1 = bitcast i64 %A to <2 x i32>
     84   %add = add <2 x i32> %1, <i32 3, i32 5>
     85   %2 = bitcast <2 x i32> %add to i64
     86   ret i64 %2
     87 }
     88 
     89 define double @test5(double %A) {
     90 ; CHECK-LABEL: test5:
     91 ; CHECK:       # %bb.0:
     92 ; CHECK-NEXT:    addps {{.*}}(%rip), %xmm0
     93 ; CHECK-NEXT:    retq
     94 ;
     95 ; CHECK-WIDE-LABEL: test5:
     96 ; CHECK-WIDE:       # %bb.0:
     97 ; CHECK-WIDE-NEXT:    addps {{.*}}(%rip), %xmm0
     98 ; CHECK-WIDE-NEXT:    retq
     99   %1 = bitcast double %A to <2 x float>
    100   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
    101   %2 = bitcast <2 x float> %add to double
    102   ret double %2
    103 }
    104 
    105 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
    106 ; single paddw instruction. This is fixed with the widening legalization.
    107 
    108 define double @test6(double %A) {
    109 ; CHECK-LABEL: test6:
    110 ; CHECK:       # %bb.0:
    111 ; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    112 ; CHECK-NEXT:    paddw {{.*}}(%rip), %xmm0
    113 ; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    114 ; CHECK-NEXT:    retq
    115 ;
    116 ; CHECK-WIDE-LABEL: test6:
    117 ; CHECK-WIDE:       # %bb.0:
    118 ; CHECK-WIDE-NEXT:    paddw {{.*}}(%rip), %xmm0
    119 ; CHECK-WIDE-NEXT:    retq
    120   %1 = bitcast double %A to <4 x i16>
    121   %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
    122   %2 = bitcast <4 x i16> %add to double
    123   ret double %2
    124 }
    125 
    126 define double @test7(double %A, double %B) {
    127 ; CHECK-LABEL: test7:
    128 ; CHECK:       # %bb.0:
    129 ; CHECK-NEXT:    paddw %xmm1, %xmm0
    130 ; CHECK-NEXT:    retq
    131 ;
    132 ; CHECK-WIDE-LABEL: test7:
    133 ; CHECK-WIDE:       # %bb.0:
    134 ; CHECK-WIDE-NEXT:    paddw %xmm1, %xmm0
    135 ; CHECK-WIDE-NEXT:    retq
    136   %1 = bitcast double %A to <4 x i16>
    137   %2 = bitcast double %B to <4 x i16>
    138   %add = add <4 x i16> %1, %2
    139   %3 = bitcast <4 x i16> %add to double
    140   ret double %3
    141 }
    142 
    143 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
    144 ; single paddb instruction. At the moment we produce the sequence
    145 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
    146 
    147 define double @test8(double %A) {
    148 ; CHECK-LABEL: test8:
    149 ; CHECK:       # %bb.0:
    150 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    151 ; CHECK-NEXT:    paddb {{.*}}(%rip), %xmm0
    152 ; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    153 ; CHECK-NEXT:    retq
    154 ;
    155 ; CHECK-WIDE-LABEL: test8:
    156 ; CHECK-WIDE:       # %bb.0:
    157 ; CHECK-WIDE-NEXT:    paddb {{.*}}(%rip), %xmm0
    158 ; CHECK-WIDE-NEXT:    retq
    159   %1 = bitcast double %A to <8 x i8>
    160   %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
    161   %2 = bitcast <8 x i8> %add to double
    162   ret double %2
    163 }
    164 
    165 define double @test9(double %A, double %B) {
    166 ; CHECK-LABEL: test9:
    167 ; CHECK:       # %bb.0:
    168 ; CHECK-NEXT:    paddb %xmm1, %xmm0
    169 ; CHECK-NEXT:    retq
    170 ;
    171 ; CHECK-WIDE-LABEL: test9:
    172 ; CHECK-WIDE:       # %bb.0:
    173 ; CHECK-WIDE-NEXT:    paddb %xmm1, %xmm0
    174 ; CHECK-WIDE-NEXT:    retq
    175   %1 = bitcast double %A to <8 x i8>
    176   %2 = bitcast double %B to <8 x i8>
    177   %add = add <8 x i8> %1, %2
    178   %3 = bitcast <8 x i8> %add to double
    179   ret double %3
    180 }
    181