Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefix=SSE3
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
      4 
      5 define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
      6 ; SSE3-LABEL: haddpd1:
      7 ; SSE3:       # BB#0:
      8 ; SSE3-NEXT:    haddpd %xmm1, %xmm0
      9 ; SSE3-NEXT:    retq
     10 ;
     11 ; AVX-LABEL: haddpd1:
     12 ; AVX:       # BB#0:
     13 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
     14 ; AVX-NEXT:    retq
     15   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
     16   %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
     17   %r = fadd <2 x double> %a, %b
     18   ret <2 x double> %r
     19 }
     20 
     21 define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
     22 ; SSE3-LABEL: haddpd2:
     23 ; SSE3:       # BB#0:
     24 ; SSE3-NEXT:    haddpd %xmm1, %xmm0
     25 ; SSE3-NEXT:    retq
     26 ;
     27 ; AVX-LABEL: haddpd2:
     28 ; AVX:       # BB#0:
     29 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
     30 ; AVX-NEXT:    retq
     31   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
     32   %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1>
     33   %r = fadd <2 x double> %a, %b
     34   ret <2 x double> %r
     35 }
     36 
     37 define <2 x double> @haddpd3(<2 x double> %x) {
     38 ; SSE3-LABEL: haddpd3:
     39 ; SSE3:       # BB#0:
     40 ; SSE3-NEXT:    haddpd %xmm0, %xmm0
     41 ; SSE3-NEXT:    retq
     42 ;
     43 ; AVX-LABEL: haddpd3:
     44 ; AVX:       # BB#0:
     45 ; AVX-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
     46 ; AVX-NEXT:    retq
     47   %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
     48   %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
     49   %r = fadd <2 x double> %a, %b
     50   ret <2 x double> %r
     51 }
     52 
     53 define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
     54 ; SSE3-LABEL: haddps1:
     55 ; SSE3:       # BB#0:
     56 ; SSE3-NEXT:    haddps %xmm1, %xmm0
     57 ; SSE3-NEXT:    retq
     58 ;
     59 ; AVX-LABEL: haddps1:
     60 ; AVX:       # BB#0:
     61 ; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
     62 ; AVX-NEXT:    retq
     63   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     64   %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     65   %r = fadd <4 x float> %a, %b
     66   ret <4 x float> %r
     67 }
     68 
     69 define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
     70 ; SSE3-LABEL: haddps2:
     71 ; SSE3:       # BB#0:
     72 ; SSE3-NEXT:    haddps %xmm1, %xmm0
     73 ; SSE3-NEXT:    retq
     74 ;
     75 ; AVX-LABEL: haddps2:
     76 ; AVX:       # BB#0:
     77 ; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
     78 ; AVX-NEXT:    retq
     79   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
     80   %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
     81   %r = fadd <4 x float> %a, %b
     82   ret <4 x float> %r
     83 }
     84 
     85 define <4 x float> @haddps3(<4 x float> %x) {
     86 ; SSE3-LABEL: haddps3:
     87 ; SSE3:       # BB#0:
     88 ; SSE3-NEXT:    haddps %xmm0, %xmm0
     89 ; SSE3-NEXT:    retq
     90 ;
     91 ; AVX-LABEL: haddps3:
     92 ; AVX:       # BB#0:
     93 ; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
     94 ; AVX-NEXT:    retq
     95   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
     96   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
     97   %r = fadd <4 x float> %a, %b
     98   ret <4 x float> %r
     99 }
    100 
    101 define <4 x float> @haddps4(<4 x float> %x) {
    102 ; SSE3-LABEL: haddps4:
    103 ; SSE3:       # BB#0:
    104 ; SSE3-NEXT:    haddps %xmm0, %xmm0
    105 ; SSE3-NEXT:    retq
    106 ;
    107 ; AVX-LABEL: haddps4:
    108 ; AVX:       # BB#0:
    109 ; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
    110 ; AVX-NEXT:    retq
    111   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
    112   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
    113   %r = fadd <4 x float> %a, %b
    114   ret <4 x float> %r
    115 }
    116 
    117 define <4 x float> @haddps5(<4 x float> %x) {
    118 ; SSE3-LABEL: haddps5:
    119 ; SSE3:       # BB#0:
    120 ; SSE3-NEXT:    haddps %xmm0, %xmm0
    121 ; SSE3-NEXT:    retq
    122 ;
    123 ; AVX-LABEL: haddps5:
    124 ; AVX:       # BB#0:
    125 ; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
    126 ; AVX-NEXT:    retq
    127   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
    128   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
    129   %r = fadd <4 x float> %a, %b
    130   ret <4 x float> %r
    131 }
    132 
    133 define <4 x float> @haddps6(<4 x float> %x) {
    134 ; SSE3-LABEL: haddps6:
    135 ; SSE3:       # BB#0:
    136 ; SSE3-NEXT:    haddps %xmm0, %xmm0
    137 ; SSE3-NEXT:    retq
    138 ;
    139 ; AVX-LABEL: haddps6:
    140 ; AVX:       # BB#0:
    141 ; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
    142 ; AVX-NEXT:    retq
    143   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    144   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    145   %r = fadd <4 x float> %a, %b
    146   ret <4 x float> %r
    147 }
    148 
    149 define <4 x float> @haddps7(<4 x float> %x) {
    150 ; SSE3-LABEL: haddps7:
    151 ; SSE3:       # BB#0:
    152 ; SSE3-NEXT:    haddps %xmm0, %xmm0
    153 ; SSE3-NEXT:    retq
    154 ;
    155 ; AVX-LABEL: haddps7:
    156 ; AVX:       # BB#0:
    157 ; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
    158 ; AVX-NEXT:    retq
    159   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
    160   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
    161   %r = fadd <4 x float> %a, %b
    162   ret <4 x float> %r
    163 }
    164 
    165 define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
    166 ; SSE3-LABEL: hsubpd1:
    167 ; SSE3:       # BB#0:
    168 ; SSE3-NEXT:    hsubpd %xmm1, %xmm0
    169 ; SSE3-NEXT:    retq
    170 ;
    171 ; AVX-LABEL: hsubpd1:
    172 ; AVX:       # BB#0:
    173 ; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
    174 ; AVX-NEXT:    retq
    175   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
    176   %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
    177   %r = fsub <2 x double> %a, %b
    178   ret <2 x double> %r
    179 }
    180 
    181 define <2 x double> @hsubpd2(<2 x double> %x) {
    182 ; SSE3-LABEL: hsubpd2:
    183 ; SSE3:       # BB#0:
    184 ; SSE3-NEXT:    hsubpd %xmm0, %xmm0
    185 ; SSE3-NEXT:    retq
    186 ;
    187 ; AVX-LABEL: hsubpd2:
    188 ; AVX:       # BB#0:
    189 ; AVX-NEXT:    vhsubpd %xmm0, %xmm0, %xmm0
    190 ; AVX-NEXT:    retq
    191   %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
    192   %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
    193   %r = fsub <2 x double> %a, %b
    194   ret <2 x double> %r
    195 }
    196 
    197 define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
    198 ; SSE3-LABEL: hsubps1:
    199 ; SSE3:       # BB#0:
    200 ; SSE3-NEXT:    hsubps %xmm1, %xmm0
    201 ; SSE3-NEXT:    retq
    202 ;
    203 ; AVX-LABEL: hsubps1:
    204 ; AVX:       # BB#0:
    205 ; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
    206 ; AVX-NEXT:    retq
    207   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    208   %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    209   %r = fsub <4 x float> %a, %b
    210   ret <4 x float> %r
    211 }
    212 
    213 define <4 x float> @hsubps2(<4 x float> %x) {
    214 ; SSE3-LABEL: hsubps2:
    215 ; SSE3:       # BB#0:
    216 ; SSE3-NEXT:    hsubps %xmm0, %xmm0
    217 ; SSE3-NEXT:    retq
    218 ;
    219 ; AVX-LABEL: hsubps2:
    220 ; AVX:       # BB#0:
    221 ; AVX-NEXT:    vhsubps %xmm0, %xmm0, %xmm0
    222 ; AVX-NEXT:    retq
    223   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
    224   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
    225   %r = fsub <4 x float> %a, %b
    226   ret <4 x float> %r
    227 }
    228 
    229 define <4 x float> @hsubps3(<4 x float> %x) {
    230 ; SSE3-LABEL: hsubps3:
    231 ; SSE3:       # BB#0:
    232 ; SSE3-NEXT:    hsubps %xmm0, %xmm0
    233 ; SSE3-NEXT:    retq
    234 ;
    235 ; AVX-LABEL: hsubps3:
    236 ; AVX:       # BB#0:
    237 ; AVX-NEXT:    vhsubps %xmm0, %xmm0, %xmm0
    238 ; AVX-NEXT:    retq
    239   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
    240   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
    241   %r = fsub <4 x float> %a, %b
    242   ret <4 x float> %r
    243 }
    244 
    245 define <4 x float> @hsubps4(<4 x float> %x) {
    246 ; SSE3-LABEL: hsubps4:
    247 ; SSE3:       # BB#0:
    248 ; SSE3-NEXT:    hsubps %xmm0, %xmm0
    249 ; SSE3-NEXT:    retq
    250 ;
    251 ; AVX-LABEL: hsubps4:
    252 ; AVX:       # BB#0:
    253 ; AVX-NEXT:    vhsubps %xmm0, %xmm0, %xmm0
    254 ; AVX-NEXT:    retq
    255   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    256   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    257   %r = fsub <4 x float> %a, %b
    258   ret <4 x float> %r
    259 }
    260 
    261 define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
    262 ; SSE3-LABEL: vhaddps1:
    263 ; SSE3:       # BB#0:
    264 ; SSE3-NEXT:    haddps %xmm2, %xmm0
    265 ; SSE3-NEXT:    haddps %xmm3, %xmm1
    266 ; SSE3-NEXT:    retq
    267 ;
    268 ; AVX-LABEL: vhaddps1:
    269 ; AVX:       # BB#0:
    270 ; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
    271 ; AVX-NEXT:    retq
    272   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
    273   %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
    274   %r = fadd <8 x float> %a, %b
    275   ret <8 x float> %r
    276 }
    277 
    278 define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
    279 ; SSE3-LABEL: vhaddps2:
    280 ; SSE3:       # BB#0:
    281 ; SSE3-NEXT:    haddps %xmm2, %xmm0
    282 ; SSE3-NEXT:    haddps %xmm3, %xmm1
    283 ; SSE3-NEXT:    retq
    284 ;
    285 ; AVX-LABEL: vhaddps2:
    286 ; AVX:       # BB#0:
    287 ; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
    288 ; AVX-NEXT:    retq
    289   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
    290   %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
    291   %r = fadd <8 x float> %a, %b
    292   ret <8 x float> %r
    293 }
    294 
    295 define <8 x float> @vhaddps3(<8 x float> %x) {
    296 ; SSE3-LABEL: vhaddps3:
    297 ; SSE3:       # BB#0:
    298 ; SSE3-NEXT:    haddps %xmm0, %xmm0
    299 ; SSE3-NEXT:    haddps %xmm1, %xmm1
    300 ; SSE3-NEXT:    retq
    301 ;
    302 ; AVX-LABEL: vhaddps3:
    303 ; AVX:       # BB#0:
    304 ; AVX-NEXT:    vhaddps %ymm0, %ymm0, %ymm0
    305 ; AVX-NEXT:    retq
    306   %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
    307   %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
    308   %r = fadd <8 x float> %a, %b
    309   ret <8 x float> %r
    310 }
    311 
    312 define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
    313 ; SSE3-LABEL: vhsubps1:
    314 ; SSE3:       # BB#0:
    315 ; SSE3-NEXT:    hsubps %xmm2, %xmm0
    316 ; SSE3-NEXT:    hsubps %xmm3, %xmm1
    317 ; SSE3-NEXT:    retq
    318 ;
    319 ; AVX-LABEL: vhsubps1:
    320 ; AVX:       # BB#0:
    321 ; AVX-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
    322 ; AVX-NEXT:    retq
    323   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
    324   %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
    325   %r = fsub <8 x float> %a, %b
    326   ret <8 x float> %r
    327 }
    328 
    329 define <8 x float> @vhsubps3(<8 x float> %x) {
    330 ; SSE3-LABEL: vhsubps3:
    331 ; SSE3:       # BB#0:
    332 ; SSE3-NEXT:    hsubps %xmm0, %xmm0
    333 ; SSE3-NEXT:    hsubps %xmm1, %xmm1
    334 ; SSE3-NEXT:    retq
    335 ;
    336 ; AVX-LABEL: vhsubps3:
    337 ; AVX:       # BB#0:
    338 ; AVX-NEXT:    vhsubps %ymm0, %ymm0, %ymm0
    339 ; AVX-NEXT:    retq
    340   %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
    341   %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
    342   %r = fsub <8 x float> %a, %b
    343   ret <8 x float> %r
    344 }
    345 
    346 define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
    347 ; SSE3-LABEL: vhaddpd1:
    348 ; SSE3:       # BB#0:
    349 ; SSE3-NEXT:    haddpd %xmm2, %xmm0
    350 ; SSE3-NEXT:    haddpd %xmm3, %xmm1
    351 ; SSE3-NEXT:    retq
    352 ;
    353 ; AVX-LABEL: vhaddpd1:
    354 ; AVX:       # BB#0:
    355 ; AVX-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
    356 ; AVX-NEXT:    retq
    357   %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    358   %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    359   %r = fadd <4 x double> %a, %b
    360   ret <4 x double> %r
    361 }
    362 
    363 define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
    364 ; SSE3-LABEL: vhsubpd1:
    365 ; SSE3:       # BB#0:
    366 ; SSE3-NEXT:    hsubpd %xmm2, %xmm0
    367 ; SSE3-NEXT:    hsubpd %xmm3, %xmm1
    368 ; SSE3-NEXT:    retq
    369 ;
    370 ; AVX-LABEL: vhsubpd1:
    371 ; AVX:       # BB#0:
    372 ; AVX-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
    373 ; AVX-NEXT:    retq
    374   %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    375   %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    376   %r = fsub <4 x double> %a, %b
    377   ret <4 x double> %r
    378 }
    379 
    380 define <2 x float> @haddps_v2f32(<4 x float> %v0) {
    381 ; SSE3-LABEL: haddps_v2f32:
    382 ; SSE3:       # BB#0:
    383 ; SSE3-NEXT:    haddps %xmm0, %xmm0
    384 ; SSE3-NEXT:    retq
    385 ;
    386 ; AVX-LABEL: haddps_v2f32:
    387 ; AVX:       # BB#0:
    388 ; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
    389 ; AVX-NEXT:    retq
    390   %v0.0 = extractelement <4 x float> %v0, i32 0
    391   %v0.1 = extractelement <4 x float> %v0, i32 1
    392   %v0.2 = extractelement <4 x float> %v0, i32 2
    393   %v0.3 = extractelement <4 x float> %v0, i32 3
    394   %op0 = fadd float %v0.0, %v0.1
    395   %op1 = fadd float %v0.2, %v0.3
    396   %res0 = insertelement <2 x float> undef, float %op0, i32 0
    397   %res1 = insertelement <2 x float> %res0, float %op1, i32 1
    398   ret <2 x float> %res1
    399 }
    400