Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
      2 ; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK
      3 
      4 ; Test ADDSUB ISel patterns.
      5 
      6 ; Functions below are obtained from the following source:
      7 ;
      8 ; typedef double double2 __attribute__((ext_vector_type(2)));
      9 ; typedef double double4 __attribute__((ext_vector_type(4)));
     10 ; typedef float float4 __attribute__((ext_vector_type(4)));
     11 ; typedef float float8 __attribute__((ext_vector_type(8)));
     12 ;
     13 ; float4 test1(float4 A, float4 B) {
     14 ;   float4 X = A - B;
     15 ;   float4 Y = A + B;
     16 ;   return (float4){X[0], Y[1], X[2], Y[3]};
     17 ; }
     18 ;
     19 ; float8 test2(float8 A, float8 B) {
     20 ;   float8 X = A - B;
     21 ;   float8 Y = A + B;
     22 ;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
     23 ; }
     24 ;
     25 ; double4 test3(double4 A, double4 B) {
     26 ;   double4 X = A - B;
     27 ;   double4 Y = A + B;
     28 ;   return (double4){X[0], Y[1], X[2], Y[3]};
     29 ; }
     30 ;
     31 ; double2 test4(double2 A, double2 B) {
     32 ;   double2 X = A - B;
     33 ;   double2 Y = A + B;
     34 ;   return (double2){X[0], Y[1]};
     35 ; }
     36 
     37 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
     38   %sub = fsub <4 x float> %A, %B
     39   %add = fadd <4 x float> %A, %B
     40   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     41   ret <4 x float> %vecinit6
     42 }
     43 ; CHECK-LABEL: test1
     44 ; SSE: addsubps
     45 ; AVX: vaddsubps
     46 ; CHECK-NEXT: ret
     47 
     48 
     49 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
     50   %sub = fsub <8 x float> %A, %B
     51   %add = fadd <8 x float> %A, %B
     52   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
     53   ret <8 x float> %vecinit14
     54 }
     55 ; CHECK-LABEL: test2
     56 ; SSE: addsubps
     57 ; SSE-NEXT: addsubps
     58 ; AVX: vaddsubps
     59 ; AVX-NOT: vaddsubps
     60 ; CHECK: ret
     61 
     62 
     63 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
     64   %sub = fsub <4 x double> %A, %B
     65   %add = fadd <4 x double> %A, %B
     66   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     67   ret <4 x double> %vecinit6
     68 }
     69 ; CHECK-LABEL: test3
     70 ; SSE: addsubpd
     71 ; SSE: addsubpd
     72 ; AVX: vaddsubpd
     73 ; AVX-NOT: vaddsubpd
     74 ; CHECK: ret
     75 
     76 
     77 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
     78   %add = fadd <2 x double> %A, %B
     79   %sub = fsub <2 x double> %A, %B
     80   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
     81   ret <2 x double> %vecinit2
     82 }
     83 ; CHECK-LABEL: test4
     84 ; SSE: addsubpd
     85 ; AVX: vaddsubpd
     86 ; CHECK-NEXT: ret
     87 
     88 
     89 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
     90   %1 = load <4 x float>* %B
     91   %add = fadd <4 x float> %A, %1
     92   %sub = fsub <4 x float> %A, %1
     93   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     94   ret <4 x float> %vecinit6
     95 }
     96 ; CHECK-LABEL: test1b
     97 ; SSE: addsubps
     98 ; AVX: vaddsubps
     99 ; CHECK-NEXT: ret
    100 
    101 
    102 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
    103   %1 = load <8 x float>* %B
    104   %add = fadd <8 x float> %A, %1
    105   %sub = fsub <8 x float> %A, %1
    106   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    107   ret <8 x float> %vecinit14
    108 }
    109 ; CHECK-LABEL: test2b
    110 ; SSE: addsubps
    111 ; SSE-NEXT: addsubps
    112 ; AVX: vaddsubps
    113 ; AVX-NOT: vaddsubps
    114 ; CHECK: ret
    115 
    116 
    117 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
    118   %1 = load <4 x double>* %B
    119   %add = fadd <4 x double> %A, %1
    120   %sub = fsub <4 x double> %A, %1
    121   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    122   ret <4 x double> %vecinit6
    123 }
    124 ; CHECK-LABEL: test3b
    125 ; SSE: addsubpd
    126 ; SSE: addsubpd
    127 ; AVX: vaddsubpd
    128 ; AVX-NOT: vaddsubpd
    129 ; CHECK: ret
    130 
    131 
    132 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
    133   %1 = load <2 x double>* %B
    134   %sub = fsub <2 x double> %A, %1
    135   %add = fadd <2 x double> %A, %1
    136   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
    137   ret <2 x double> %vecinit2
    138 }
    139 ; CHECK-LABEL: test4b
    140 ; SSE: addsubpd
    141 ; AVX: vaddsubpd
    142 ; CHECK-NEXT: ret
    143 
    144 ; Functions below are obtained from the following source:
    145 ;
    146 ; float4 test1(float4 A, float4 B) {
    147 ;   float4 X = A + B;
    148 ;   float4 Y = A - B;
    149 ;   return (float4){X[0], Y[1], X[2], Y[3]};
    150 ; }
    151 ;
    152 ; float8 test2(float8 A, float8 B) {
    153 ;   float8 X = A + B;
    154 ;   float8 Y = A - B;
    155 ;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
    156 ; }
    157 ;
    158 ; double4 test3(double4 A, double4 B) {
    159 ;   double4 X = A + B;
    160 ;   double4 Y = A - B;
    161 ;   return (double4){X[0], Y[1], X[2], Y[3]};
    162 ; }
    163 ;
    164 ; double2 test4(double2 A, double2 B) {
    165 ;   double2 X = A + B;
    166 ;   double2 Y = A - B;
    167 ;   return (double2){X[0], Y[1]};
    168 ; }
    169 
    170 define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
    171   %sub = fsub <4 x float> %A, %B
    172   %add = fadd <4 x float> %A, %B
    173   %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    174   ret <4 x float> %vecinit6
    175 }
    176 ; CHECK-LABEL: test5
    177 ; SSE: xorps
    178 ; SSE-NEXT: addsubps
    179 ; AVX: vxorps
    180 ; AVX-NEXT: vaddsubps
    181 ; CHECK: ret
    182 
    183 
    184 define <8 x float> @test6(<8 x float> %A, <8 x float> %B) {
    185   %sub = fsub <8 x float> %A, %B
    186   %add = fadd <8 x float> %A, %B
    187   %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    188   ret <8 x float> %vecinit14
    189 }
    190 ; CHECK-LABEL: test6
    191 ; SSE: xorps
    192 ; SSE-NEXT: addsubps
    193 ; SSE: xorps
    194 ; SSE-NEXT: addsubps
    195 ; AVX: vxorps
    196 ; AVX-NEXT: vaddsubps
    197 ; AVX-NOT: vxorps
    198 ; AVX-NOT: vaddsubps
    199 ; CHECK: ret
    200 
    201 
    202 define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
    203   %sub = fsub <4 x double> %A, %B
    204   %add = fadd <4 x double> %A, %B
    205   %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    206   ret <4 x double> %vecinit6
    207 }
    208 ; CHECK-LABEL: test7
    209 ; SSE: xorpd
    210 ; SSE-NEXT: addsubpd
    211 ; SSE: xorpd
    212 ; SSE-NEXT: addsubpd
    213 ; AVX: vxorpd
    214 ; AVX-NEXT: vaddsubpd
    215 ; AVX-NOT: vxorpd
    216 ; AVX-NOT: vaddsubpd
    217 ; CHECK: ret
    218 
    219 
    220 define <2 x double> @test8(<2 x double> %A, <2 x double> %B) #0 {
    221   %add = fadd <2 x double> %A, %B
    222   %sub = fsub <2 x double> %A, %B
    223   %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3>
    224   ret <2 x double> %vecinit2
    225 }
    226 ; CHECK-LABEL: test8
    227 ; SSE: xorpd
    228 ; SSE-NEXT: addsubpd
    229 ; AVX: vxorpd
    230 ; AVX-NEXT: vaddsubpd
    231 ; CHECK: ret
    232 
    233 
    234 define <4 x float> @test5b(<4 x float> %A, <4 x float> %B) {
    235   %sub = fsub <4 x float> %A, %B
    236   %add = fadd <4 x float> %B, %A
    237   %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    238   ret <4 x float> %vecinit6
    239 }
    240 ; CHECK-LABEL: test5
    241 ; SSE: xorps
    242 ; SSE-NEXT: addsubps
    243 ; AVX: vxorps
    244 ; AVX-NEXT: vaddsubps
    245 ; CHECK: ret
    246 
    247 
    248 define <8 x float> @test6b(<8 x float> %A, <8 x float> %B) {
    249   %sub = fsub <8 x float> %A, %B
    250   %add = fadd <8 x float> %B, %A
    251   %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    252   ret <8 x float> %vecinit14
    253 }
    254 ; CHECK-LABEL: test6
    255 ; SSE: xorps
    256 ; SSE-NEXT: addsubps
    257 ; SSE: xorps
    258 ; SSE-NEXT: addsubps
    259 ; AVX: vxorps
    260 ; AVX-NEXT: vaddsubps
    261 ; AVX-NOT: vxorps
    262 ; AVX-NOT: vaddsubps
    263 ; CHECK: ret
    264 
    265 
    266 define <4 x double> @test7b(<4 x double> %A, <4 x double> %B) {
    267   %sub = fsub <4 x double> %A, %B
    268   %add = fadd <4 x double> %B, %A
    269   %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    270   ret <4 x double> %vecinit6
    271 }
    272 ; CHECK-LABEL: test7
    273 ; SSE: xorpd
    274 ; SSE-NEXT: addsubpd
    275 ; SSE: xorpd
    276 ; SSE-NEXT: addsubpd
    277 ; AVX: vxorpd
    278 ; AVX-NEXT: vaddsubpd
    279 ; AVX-NOT: vxorpd
    280 ; AVX-NOT: vaddsubpd
    281 ; CHECK: ret
    282 
    283 
    284 define <2 x double> @test8b(<2 x double> %A, <2 x double> %B) #0 {
    285   %add = fadd <2 x double> %B, %A
    286   %sub = fsub <2 x double> %A, %B
    287   %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3>
    288   ret <2 x double> %vecinit2
    289 }
    290 ; CHECK-LABEL: test8
    291 ; SSE: xorpd
    292 ; SSE-NEXT: addsubpd
    293 ; AVX: vxorpd
    294 ; AVX-NEXT: vaddsubpd
    295 ; CHECK: ret
    296 
    297