Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
      2 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
      3 
      4 
      5 define double @test1(double %A) {
      6   %1 = bitcast double %A to <2 x i32>
      7   %add = add <2 x i32> %1, <i32 3, i32 5>
      8   %2 = bitcast <2 x i32> %add to double
      9   ret double %2
     10 }
     11 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
     12 ; single paddd instruction. At the moment we produce the sequence 
     13 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
     14 ;
     15 ; CHECK-LABEL: test1
     16 ; CHECK-NOT: movsd
     17 ; CHECK: pshufd
     18 ; CHECK-NEXT: paddd
     19 ; CHECK-NEXT: pshufd
     20 ; CHECK-NEXT: ret
     21 ;
     22 ; CHECK-WIDE-LABEL: test1
     23 ; CHECK-WIDE-NOT: movsd
     24 ; CHECK-WIDE: paddd
     25 ; CHECK-WIDE-NEXT: ret
     26 
     27 
     28 define double @test2(double %A, double %B) {
     29   %1 = bitcast double %A to <2 x i32>
     30   %2 = bitcast double %B to <2 x i32>
     31   %add = add <2 x i32> %1, %2
     32   %3 = bitcast <2 x i32> %add to double
     33   ret double %3
     34 }
     35 ; CHECK-LABEL: test2
     36 ; CHECK-NOT: movsd
     37 ; CHECK: paddd
     38 ; CHECK-NEXT: ret
     39 ;
     40 ; CHECK-WIDE-LABEL: test2
     41 ; CHECK-WIDE-NOT: movsd
     42 ; CHECK-WIDE: paddd
     43 ; CHECK-WIDE-NEXT: ret
     44 
     45 
     46 define i64 @test3(i64 %A) {
     47   %1 = bitcast i64 %A to <2 x float>
     48   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
     49   %2 = bitcast <2 x float> %add to i64
     50   ret i64 %2
     51 }
     52 ; CHECK-LABEL: test3
     53 ; CHECK-NOT: pshufd
     54 ; CHECK: addps
     55 ; CHECK-NOT: pshufd
     56 ; CHECK: ret
     57 ;
     58 ; CHECK-WIDE-LABEL: test3
     59 ; CHECK-WIDE-NOT: pshufd
     60 ; CHECK-WIDE: addps
     61 ; CHECK-WIDE-NOT: pshufd
     62 ; CHECK-WIDE: ret
     63 
     64 
     65 define i64 @test4(i64 %A) {
     66   %1 = bitcast i64 %A to <2 x i32>
     67   %add = add <2 x i32> %1, <i32 3, i32 5>
     68   %2 = bitcast <2 x i32> %add to i64
     69   ret i64 %2
     70 }
     71 ; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
     72 ; Ideally, we should fold that sequence into a single paddd. This is fixed with
     73 ; the widening legalization.
     74 ;
     75 ; CHECK-LABEL: test4
     76 ; CHECK: pshufd
     77 ; CHECK-NEXT: paddd
     78 ; CHECK-NEXT: pshufd
     79 ; CHECK: ret
     80 ;
     81 ; CHECK-WIDE-LABEL: test4
     82 ; CHECK-WIDE: movd %{{rdi|rcx}},
     83 ; CHECK-WIDE-NEXT: paddd
     84 ; CHECK-WIDE-NEXT: movd {{.*}}, %rax
     85 ; CHECK-WIDE: ret
     86 
     87 
     88 define double @test5(double %A) {
     89   %1 = bitcast double %A to <2 x float>
     90   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
     91   %2 = bitcast <2 x float> %add to double
     92   ret double %2
     93 }
     94 ; CHECK-LABEL: test5
     95 ; CHECK: addps
     96 ; CHECK-NEXT: ret
     97 ;
     98 ; CHECK-WIDE-LABEL: test5
     99 ; CHECK-WIDE: addps
    100 ; CHECK-WIDE-NEXT: ret
    101 
    102 
    103 define double @test6(double %A) {
    104   %1 = bitcast double %A to <4 x i16>
    105   %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
    106   %2 = bitcast <4 x i16> %add to double
    107   ret double %2
    108 }
    109 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
    110 ; single paddw instruction. This is fixed with the widening legalization.
    111 ;
    112 ; CHECK-LABEL: test6
    113 ; CHECK-NOT: movsd
    114 ; CHECK: punpcklwd
    115 ; CHECK-NEXT: paddw
    116 ; CHECK-NEXT: pshufb
    117 ; CHECK-NEXT: ret
    118 ;
    119 ; CHECK-WIDE-LABEL: test6
    120 ; CHECK-WIDE-NOT: mov
    121 ; CHECK-WIDE-NOT: punpcklwd
    122 ; CHECK-WIDE: paddw
    123 ; CHECK-WIDE-NEXT: ret
    124 
    125 
    126 define double @test7(double %A, double %B) {
    127   %1 = bitcast double %A to <4 x i16>
    128   %2 = bitcast double %B to <4 x i16>
    129   %add = add <4 x i16> %1, %2
    130   %3 = bitcast <4 x i16> %add to double
    131   ret double %3
    132 }
    133 ; CHECK-LABEL: test7
    134 ; CHECK-NOT: movsd
    135 ; CHECK-NOT: punpcklwd
    136 ; CHECK: paddw
    137 ; CHECK-NEXT: ret
    138 ;
    139 ; CHECK-WIDE-LABEL: test7
    140 ; CHECK-WIDE-NOT: movsd
    141 ; CHECK-WIDE-NOT: punpcklwd
    142 ; CHECK-WIDE: paddw
    143 ; CHECK-WIDE-NEXT: ret
    144 
    145 
    146 define double @test8(double %A) {
    147   %1 = bitcast double %A to <8 x i8>
    148   %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
    149   %2 = bitcast <8 x i8> %add to double
    150   ret double %2
    151 }
    152 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
    153 ; single paddb instruction. At the moment we produce the sequence 
    154 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
    155 ;
    156 ; CHECK-LABEL: test8
    157 ; CHECK-NOT: movsd
    158 ; CHECK: punpcklbw
    159 ; CHECK-NEXT: paddb
    160 ; CHECK-NEXT: pshufb
    161 ; CHECK-NEXT: ret
    162 ;
    163 ; CHECK-WIDE-LABEL: test8
    164 ; CHECK-WIDE-NOT: movsd
    165 ; CHECK-WIDE-NOT: punpcklbw
    166 ; CHECK-WIDE: paddb
    167 ; CHECK-WIDE-NEXT: ret
    168 
    169 
    170 define double @test9(double %A, double %B) {
    171   %1 = bitcast double %A to <8 x i8>
    172   %2 = bitcast double %B to <8 x i8>
    173   %add = add <8 x i8> %1, %2
    174   %3 = bitcast <8 x i8> %add to double
    175   ret double %3
    176 }
    177 ; CHECK-LABEL: test9
    178 ; CHECK-NOT: movsd
    179 ; CHECK-NOT: punpcklbw
    180 ; CHECK: paddb
    181 ; CHECK-NEXT: ret
    182 ;
    183 ; CHECK-WIDE-LABEL: test9
    184 ; CHECK-WIDE-NOT: movsd
    185 ; CHECK-WIDE-NOT: punpcklbw
    186 ; CHECK-WIDE: paddb
    187 ; CHECK-WIDE-NEXT: ret
    188 
    189