Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
      5 
      6 ; Test ADDSUB ISel patterns.
      7 
      8 ; Functions below are obtained from the following source:
      9 ;
     10 ; typedef double double2 __attribute__((ext_vector_type(2)));
     11 ; typedef double double4 __attribute__((ext_vector_type(4)));
     12 ; typedef float float4 __attribute__((ext_vector_type(4)));
     13 ; typedef float float8 __attribute__((ext_vector_type(8)));
     14 ;
     15 ; float4 test1(float4 A, float4 B) {
     16 ;   float4 X = A - B;
     17 ;   float4 Y = A + B;
     18 ;   return (float4){X[0], Y[1], X[2], Y[3]};
     19 ; }
     20 ;
     21 ; float8 test2(float8 A, float8 B) {
     22 ;   float8 X = A - B;
     23 ;   float8 Y = A + B;
     24 ;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
     25 ; }
     26 ;
     27 ; double4 test3(double4 A, double4 B) {
     28 ;   double4 X = A - B;
     29 ;   double4 Y = A + B;
     30 ;   return (double4){X[0], Y[1], X[2], Y[3]};
     31 ; }
     32 ;
     33 ; double2 test4(double2 A, double2 B) {
     34 ;   double2 X = A - B;
     35 ;   double2 Y = A + B;
     36 ;   return (double2){X[0], Y[1]};
     37 ; }
     38 
     39 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
     40 ; SSE-LABEL: test1:
     41 ; SSE:       # BB#0:
     42 ; SSE-NEXT:    addsubps %xmm1, %xmm0
     43 ; SSE-NEXT:    retq
     44 ;
     45 ; AVX-LABEL: test1:
     46 ; AVX:       # BB#0:
     47 ; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
     48 ; AVX-NEXT:    retq
     49   %sub = fsub <4 x float> %A, %B
     50   %add = fadd <4 x float> %A, %B
     51   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     52   ret <4 x float> %vecinit6
     53 }
     54 
     55 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
     56 ; SSE-LABEL: test2:
     57 ; SSE:       # BB#0:
     58 ; SSE-NEXT:    addsubps %xmm2, %xmm0
     59 ; SSE-NEXT:    addsubps %xmm3, %xmm1
     60 ; SSE-NEXT:    retq
     61 ;
     62 ; AVX-LABEL: test2:
     63 ; AVX:       # BB#0:
     64 ; AVX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
     65 ; AVX-NEXT:    retq
     66   %sub = fsub <8 x float> %A, %B
     67   %add = fadd <8 x float> %A, %B
     68   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
     69   ret <8 x float> %vecinit14
     70 }
     71 
     72 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
     73 ; SSE-LABEL: test3:
     74 ; SSE:       # BB#0:
     75 ; SSE-NEXT:    addsubpd %xmm2, %xmm0
     76 ; SSE-NEXT:    addsubpd %xmm3, %xmm1
     77 ; SSE-NEXT:    retq
     78 ;
     79 ; AVX-LABEL: test3:
     80 ; AVX:       # BB#0:
     81 ; AVX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
     82 ; AVX-NEXT:    retq
     83   %sub = fsub <4 x double> %A, %B
     84   %add = fadd <4 x double> %A, %B
     85   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     86   ret <4 x double> %vecinit6
     87 }
     88 
     89 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
     90 ; SSE-LABEL: test4:
     91 ; SSE:       # BB#0:
     92 ; SSE-NEXT:    addsubpd %xmm1, %xmm0
     93 ; SSE-NEXT:    retq
     94 ;
     95 ; AVX-LABEL: test4:
     96 ; AVX:       # BB#0:
     97 ; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
     98 ; AVX-NEXT:    retq
     99   %add = fadd <2 x double> %A, %B
    100   %sub = fsub <2 x double> %A, %B
    101   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
    102   ret <2 x double> %vecinit2
    103 }
    104 
    105 define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
    106 ; SSE-LABEL: test5:
    107 ; SSE:       # BB#0:
    108 ; SSE-NEXT:    addsubps %xmm4, %xmm0
    109 ; SSE-NEXT:    addsubps %xmm5, %xmm1
    110 ; SSE-NEXT:    addsubps %xmm6, %xmm2
    111 ; SSE-NEXT:    addsubps %xmm7, %xmm3
    112 ; SSE-NEXT:    retq
    113 ;
    114 ; AVX1-LABEL: test5:
    115 ; AVX1:       # BB#0:
    116 ; AVX1-NEXT:    vaddsubps %ymm2, %ymm0, %ymm0
    117 ; AVX1-NEXT:    vaddsubps %ymm3, %ymm1, %ymm1
    118 ; AVX1-NEXT:    retq
    119 ;
    120 ; AVX512-LABEL: test5:
    121 ; AVX512:       # BB#0:
    122 ; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm2
    123 ; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm0
    124 ; AVX512-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[0,2],zmm2[1,3],zmm0[4,6],zmm2[5,7],zmm0[8,10],zmm2[9,11],zmm0[12,14],zmm2[13,15]
    125 ; AVX512-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15]
    126 ; AVX512-NEXT:    retq
    127   %add = fadd <16 x float> %A, %B
    128   %sub = fsub <16 x float> %A, %B
    129   %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    130   ret <16 x float> %vecinit2
    131 }
    132 
    133 define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
    134 ; SSE-LABEL: test6:
    135 ; SSE:       # BB#0:
    136 ; SSE-NEXT:    addsubpd %xmm4, %xmm0
    137 ; SSE-NEXT:    addsubpd %xmm5, %xmm1
    138 ; SSE-NEXT:    addsubpd %xmm6, %xmm2
    139 ; SSE-NEXT:    addsubpd %xmm7, %xmm3
    140 ; SSE-NEXT:    retq
    141 ;
    142 ; AVX1-LABEL: test6:
    143 ; AVX1:       # BB#0:
    144 ; AVX1-NEXT:    vaddsubpd %ymm2, %ymm0, %ymm0
    145 ; AVX1-NEXT:    vaddsubpd %ymm3, %ymm1, %ymm1
    146 ; AVX1-NEXT:    retq
    147 ;
    148 ; AVX512-LABEL: test6:
    149 ; AVX512:       # BB#0:
    150 ; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm2
    151 ; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0
    152 ; AVX512-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7]
    153 ; AVX512-NEXT:    retq
    154   %add = fadd <8 x double> %A, %B
    155   %sub = fsub <8 x double> %A, %B
    156   %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    157   ret <8 x double> %vecinit2
    158 }
    159 
    160 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
    161 ; SSE-LABEL: test1b:
    162 ; SSE:       # BB#0:
    163 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    164 ; SSE-NEXT:    retq
    165 ;
    166 ; AVX-LABEL: test1b:
    167 ; AVX:       # BB#0:
    168 ; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
    169 ; AVX-NEXT:    retq
    170   %1 = load <4 x float>, <4 x float>* %B
    171   %add = fadd <4 x float> %A, %1
    172   %sub = fsub <4 x float> %A, %1
    173   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    174   ret <4 x float> %vecinit6
    175 }
    176 
    177 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
    178 ; SSE-LABEL: test2b:
    179 ; SSE:       # BB#0:
    180 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    181 ; SSE-NEXT:    addsubps 16(%rdi), %xmm1
    182 ; SSE-NEXT:    retq
    183 ;
    184 ; AVX-LABEL: test2b:
    185 ; AVX:       # BB#0:
    186 ; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
    187 ; AVX-NEXT:    retq
    188   %1 = load <8 x float>, <8 x float>* %B
    189   %add = fadd <8 x float> %A, %1
    190   %sub = fsub <8 x float> %A, %1
    191   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    192   ret <8 x float> %vecinit14
    193 }
    194 
    195 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
    196 ; SSE-LABEL: test3b:
    197 ; SSE:       # BB#0:
    198 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    199 ; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
    200 ; SSE-NEXT:    retq
    201 ;
    202 ; AVX-LABEL: test3b:
    203 ; AVX:       # BB#0:
    204 ; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
    205 ; AVX-NEXT:    retq
    206   %1 = load <4 x double>, <4 x double>* %B
    207   %add = fadd <4 x double> %A, %1
    208   %sub = fsub <4 x double> %A, %1
    209   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    210   ret <4 x double> %vecinit6
    211 }
    212 
    213 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
    214 ; SSE-LABEL: test4b:
    215 ; SSE:       # BB#0:
    216 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    217 ; SSE-NEXT:    retq
    218 ;
    219 ; AVX-LABEL: test4b:
    220 ; AVX:       # BB#0:
    221 ; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
    222 ; AVX-NEXT:    retq
    223   %1 = load <2 x double>, <2 x double>* %B
    224   %sub = fsub <2 x double> %A, %1
    225   %add = fadd <2 x double> %A, %1
    226   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
    227   ret <2 x double> %vecinit2
    228 }
    229 
    230 define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
    231 ; SSE-LABEL: test1c:
    232 ; SSE:       # BB#0:
    233 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    234 ; SSE-NEXT:    retq
    235 ;
    236 ; AVX-LABEL: test1c:
    237 ; AVX:       # BB#0:
    238 ; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
    239 ; AVX-NEXT:    retq
    240   %1 = load <4 x float>, <4 x float>* %B
    241   %add = fadd <4 x float> %A, %1
    242   %sub = fsub <4 x float> %A, %1
    243   %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
    244   ret <4 x float> %vecinit6
    245 }
    246 
    247 define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
    248 ; SSE-LABEL: test2c:
    249 ; SSE:       # BB#0:
    250 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    251 ; SSE-NEXT:    addsubps 16(%rdi), %xmm1
    252 ; SSE-NEXT:    retq
    253 ;
    254 ; AVX-LABEL: test2c:
    255 ; AVX:       # BB#0:
    256 ; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
    257 ; AVX-NEXT:    retq
    258   %1 = load <8 x float>, <8 x float>* %B
    259   %add = fadd <8 x float> %A, %1
    260   %sub = fsub <8 x float> %A, %1
    261   %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
    262   ret <8 x float> %vecinit14
    263 }
    264 
    265 define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
    266 ; SSE-LABEL: test3c:
    267 ; SSE:       # BB#0:
    268 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    269 ; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
    270 ; SSE-NEXT:    retq
    271 ;
    272 ; AVX-LABEL: test3c:
    273 ; AVX:       # BB#0:
    274 ; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
    275 ; AVX-NEXT:    retq
    276   %1 = load <4 x double>, <4 x double>* %B
    277   %add = fadd <4 x double> %A, %1
    278   %sub = fsub <4 x double> %A, %1
    279   %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
    280   ret <4 x double> %vecinit6
    281 }
    282 
    283 define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
    284 ; SSE-LABEL: test4c:
    285 ; SSE:       # BB#0:
    286 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    287 ; SSE-NEXT:    retq
    288 ;
    289 ; AVX-LABEL: test4c:
    290 ; AVX:       # BB#0:
    291 ; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
    292 ; AVX-NEXT:    retq
    293   %1 = load <2 x double>, <2 x double>* %B
    294   %sub = fsub <2 x double> %A, %1
    295   %add = fadd <2 x double> %A, %1
    296   %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
    297   ret <2 x double> %vecinit2
    298 }
    299