Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
      5 
      6 ; Test ADDSUB ISel patterns.
      7 
      8 ; Functions below are obtained from the following source:
      9 ;
     10 ; typedef double double2 __attribute__((ext_vector_type(2)));
     11 ; typedef double double4 __attribute__((ext_vector_type(4)));
     12 ; typedef float float4 __attribute__((ext_vector_type(4)));
     13 ; typedef float float8 __attribute__((ext_vector_type(8)));
     14 ;
     15 ; float4 test1(float4 A, float4 B) {
     16 ;   float4 X = A - B;
     17 ;   float4 Y = A + B;
     18 ;   return (float4){X[0], Y[1], X[2], Y[3]};
     19 ; }
     20 ;
     21 ; float8 test2(float8 A, float8 B) {
     22 ;   float8 X = A - B;
     23 ;   float8 Y = A + B;
     24 ;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
     25 ; }
     26 ;
     27 ; double4 test3(double4 A, double4 B) {
     28 ;   double4 X = A - B;
     29 ;   double4 Y = A + B;
     30 ;   return (double4){X[0], Y[1], X[2], Y[3]};
     31 ; }
     32 ;
     33 ; double2 test4(double2 A, double2 B) {
     34 ;   double2 X = A - B;
     35 ;   double2 Y = A + B;
     36 ;   return (double2){X[0], Y[1]};
     37 ; }
     38 
     39 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
     40 ; SSE-LABEL: test1:
     41 ; SSE:       # %bb.0:
     42 ; SSE-NEXT:    addsubps %xmm1, %xmm0
     43 ; SSE-NEXT:    retq
     44 ;
     45 ; AVX-LABEL: test1:
     46 ; AVX:       # %bb.0:
     47 ; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
     48 ; AVX-NEXT:    retq
     49   %sub = fsub <4 x float> %A, %B
     50   %add = fadd <4 x float> %A, %B
     51   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     52   ret <4 x float> %vecinit6
     53 }
     54 
     55 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
     56 ; SSE-LABEL: test2:
     57 ; SSE:       # %bb.0:
     58 ; SSE-NEXT:    addsubps %xmm2, %xmm0
     59 ; SSE-NEXT:    addsubps %xmm3, %xmm1
     60 ; SSE-NEXT:    retq
     61 ;
     62 ; AVX-LABEL: test2:
     63 ; AVX:       # %bb.0:
     64 ; AVX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
     65 ; AVX-NEXT:    retq
     66   %sub = fsub <8 x float> %A, %B
     67   %add = fadd <8 x float> %A, %B
     68   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
     69   ret <8 x float> %vecinit14
     70 }
     71 
     72 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
     73 ; SSE-LABEL: test3:
     74 ; SSE:       # %bb.0:
     75 ; SSE-NEXT:    addsubpd %xmm2, %xmm0
     76 ; SSE-NEXT:    addsubpd %xmm3, %xmm1
     77 ; SSE-NEXT:    retq
     78 ;
     79 ; AVX-LABEL: test3:
     80 ; AVX:       # %bb.0:
     81 ; AVX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
     82 ; AVX-NEXT:    retq
     83   %sub = fsub <4 x double> %A, %B
     84   %add = fadd <4 x double> %A, %B
     85   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     86   ret <4 x double> %vecinit6
     87 }
     88 
     89 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
     90 ; SSE-LABEL: test4:
     91 ; SSE:       # %bb.0:
     92 ; SSE-NEXT:    addsubpd %xmm1, %xmm0
     93 ; SSE-NEXT:    retq
     94 ;
     95 ; AVX-LABEL: test4:
     96 ; AVX:       # %bb.0:
     97 ; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
     98 ; AVX-NEXT:    retq
     99   %add = fadd <2 x double> %A, %B
    100   %sub = fsub <2 x double> %A, %B
    101   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
    102   ret <2 x double> %vecinit2
    103 }
    104 
    105 define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
    106 ; SSE-LABEL: test5:
    107 ; SSE:       # %bb.0:
    108 ; SSE-NEXT:    addsubps %xmm4, %xmm0
    109 ; SSE-NEXT:    addsubps %xmm5, %xmm1
    110 ; SSE-NEXT:    addsubps %xmm6, %xmm2
    111 ; SSE-NEXT:    addsubps %xmm7, %xmm3
    112 ; SSE-NEXT:    retq
    113 ;
    114 ; AVX1-LABEL: test5:
    115 ; AVX1:       # %bb.0:
    116 ; AVX1-NEXT:    vaddsubps %ymm2, %ymm0, %ymm0
    117 ; AVX1-NEXT:    vaddsubps %ymm3, %ymm1, %ymm1
    118 ; AVX1-NEXT:    retq
    119 ;
    120 ; AVX512-LABEL: test5:
    121 ; AVX512:       # %bb.0:
    122 ; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm2
    123 ; AVX512-NEXT:    movw $-21846, %ax # imm = 0xAAAA
    124 ; AVX512-NEXT:    kmovw %eax, %k1
    125 ; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1}
    126 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    127 ; AVX512-NEXT:    retq
    128   %add = fadd <16 x float> %A, %B
    129   %sub = fsub <16 x float> %A, %B
    130   %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    131   ret <16 x float> %vecinit2
    132 }
    133 
    134 define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
    135 ; SSE-LABEL: test6:
    136 ; SSE:       # %bb.0:
    137 ; SSE-NEXT:    addsubpd %xmm4, %xmm0
    138 ; SSE-NEXT:    addsubpd %xmm5, %xmm1
    139 ; SSE-NEXT:    addsubpd %xmm6, %xmm2
    140 ; SSE-NEXT:    addsubpd %xmm7, %xmm3
    141 ; SSE-NEXT:    retq
    142 ;
    143 ; AVX1-LABEL: test6:
    144 ; AVX1:       # %bb.0:
    145 ; AVX1-NEXT:    vaddsubpd %ymm2, %ymm0, %ymm0
    146 ; AVX1-NEXT:    vaddsubpd %ymm3, %ymm1, %ymm1
    147 ; AVX1-NEXT:    retq
    148 ;
    149 ; AVX512-LABEL: test6:
    150 ; AVX512:       # %bb.0:
    151 ; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm2
    152 ; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0
    153 ; AVX512-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7]
    154 ; AVX512-NEXT:    retq
    155   %add = fadd <8 x double> %A, %B
    156   %sub = fsub <8 x double> %A, %B
    157   %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    158   ret <8 x double> %vecinit2
    159 }
    160 
    161 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
    162 ; SSE-LABEL: test1b:
    163 ; SSE:       # %bb.0:
    164 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    165 ; SSE-NEXT:    retq
    166 ;
    167 ; AVX-LABEL: test1b:
    168 ; AVX:       # %bb.0:
    169 ; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
    170 ; AVX-NEXT:    retq
    171   %1 = load <4 x float>, <4 x float>* %B
    172   %add = fadd <4 x float> %A, %1
    173   %sub = fsub <4 x float> %A, %1
    174   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    175   ret <4 x float> %vecinit6
    176 }
    177 
    178 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
    179 ; SSE-LABEL: test2b:
    180 ; SSE:       # %bb.0:
    181 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    182 ; SSE-NEXT:    addsubps 16(%rdi), %xmm1
    183 ; SSE-NEXT:    retq
    184 ;
    185 ; AVX-LABEL: test2b:
    186 ; AVX:       # %bb.0:
    187 ; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
    188 ; AVX-NEXT:    retq
    189   %1 = load <8 x float>, <8 x float>* %B
    190   %add = fadd <8 x float> %A, %1
    191   %sub = fsub <8 x float> %A, %1
    192   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    193   ret <8 x float> %vecinit14
    194 }
    195 
    196 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
    197 ; SSE-LABEL: test3b:
    198 ; SSE:       # %bb.0:
    199 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    200 ; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
    201 ; SSE-NEXT:    retq
    202 ;
    203 ; AVX-LABEL: test3b:
    204 ; AVX:       # %bb.0:
    205 ; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
    206 ; AVX-NEXT:    retq
    207   %1 = load <4 x double>, <4 x double>* %B
    208   %add = fadd <4 x double> %A, %1
    209   %sub = fsub <4 x double> %A, %1
    210   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    211   ret <4 x double> %vecinit6
    212 }
    213 
    214 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
    215 ; SSE-LABEL: test4b:
    216 ; SSE:       # %bb.0:
    217 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    218 ; SSE-NEXT:    retq
    219 ;
    220 ; AVX-LABEL: test4b:
    221 ; AVX:       # %bb.0:
    222 ; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
    223 ; AVX-NEXT:    retq
    224   %1 = load <2 x double>, <2 x double>* %B
    225   %sub = fsub <2 x double> %A, %1
    226   %add = fadd <2 x double> %A, %1
    227   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
    228   ret <2 x double> %vecinit2
    229 }
    230 
    231 define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
    232 ; SSE-LABEL: test1c:
    233 ; SSE:       # %bb.0:
    234 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    235 ; SSE-NEXT:    retq
    236 ;
    237 ; AVX-LABEL: test1c:
    238 ; AVX:       # %bb.0:
    239 ; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
    240 ; AVX-NEXT:    retq
    241   %1 = load <4 x float>, <4 x float>* %B
    242   %add = fadd <4 x float> %A, %1
    243   %sub = fsub <4 x float> %A, %1
    244   %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
    245   ret <4 x float> %vecinit6
    246 }
    247 
    248 define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
    249 ; SSE-LABEL: test2c:
    250 ; SSE:       # %bb.0:
    251 ; SSE-NEXT:    addsubps (%rdi), %xmm0
    252 ; SSE-NEXT:    addsubps 16(%rdi), %xmm1
    253 ; SSE-NEXT:    retq
    254 ;
    255 ; AVX-LABEL: test2c:
    256 ; AVX:       # %bb.0:
    257 ; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
    258 ; AVX-NEXT:    retq
    259   %1 = load <8 x float>, <8 x float>* %B
    260   %add = fadd <8 x float> %A, %1
    261   %sub = fsub <8 x float> %A, %1
    262   %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
    263   ret <8 x float> %vecinit14
    264 }
    265 
    266 define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
    267 ; SSE-LABEL: test3c:
    268 ; SSE:       # %bb.0:
    269 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    270 ; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
    271 ; SSE-NEXT:    retq
    272 ;
    273 ; AVX-LABEL: test3c:
    274 ; AVX:       # %bb.0:
    275 ; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
    276 ; AVX-NEXT:    retq
    277   %1 = load <4 x double>, <4 x double>* %B
    278   %add = fadd <4 x double> %A, %1
    279   %sub = fsub <4 x double> %A, %1
    280   %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
    281   ret <4 x double> %vecinit6
    282 }
    283 
    284 define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
    285 ; SSE-LABEL: test4c:
    286 ; SSE:       # %bb.0:
    287 ; SSE-NEXT:    addsubpd (%rdi), %xmm0
    288 ; SSE-NEXT:    retq
    289 ;
    290 ; AVX-LABEL: test4c:
    291 ; AVX:       # %bb.0:
    292 ; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
    293 ; AVX-NEXT:    retq
    294   %1 = load <2 x double>, <2 x double>* %B
    295   %sub = fsub <2 x double> %A, %1
    296   %add = fadd <2 x double> %A, %1
    297   %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
    298   ret <2 x double> %vecinit2
    299 }
    300