Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,-avx | FileCheck %s -check-prefix=SSE3
      2 ; RUN: llc < %s -march=x86-64 -mattr=-sse3,+avx | FileCheck %s -check-prefix=AVX
      3 
      4 ; SSE3: haddpd1:
      5 ; SSE3-NOT: vhaddpd
      6 ; SSE3: haddpd
      7 ; AVX: haddpd1:
      8 ; AVX: vhaddpd
      9 define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
     10   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
     11   %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
     12   %r = fadd <2 x double> %a, %b
     13   ret <2 x double> %r
     14 }
     15 
     16 ; SSE3: haddpd2:
     17 ; SSE3-NOT: vhaddpd
     18 ; SSE3: haddpd
     19 ; AVX: haddpd2:
     20 ; AVX: vhaddpd
     21 define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
     22   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
     23   %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1>
     24   %r = fadd <2 x double> %a, %b
     25   ret <2 x double> %r
     26 }
     27 
     28 ; SSE3: haddpd3:
     29 ; SSE3-NOT: vhaddpd
     30 ; SSE3: haddpd
     31 ; AVX: haddpd3:
     32 ; AVX: vhaddpd
     33 define <2 x double> @haddpd3(<2 x double> %x) {
     34   %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
     35   %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
     36   %r = fadd <2 x double> %a, %b
     37   ret <2 x double> %r
     38 }
     39 
     40 ; SSE3: haddps1:
     41 ; SSE3-NOT: vhaddps
     42 ; SSE3: haddps
     43 ; AVX: haddps1:
     44 ; AVX: vhaddps
     45 define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
     46   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     47   %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     48   %r = fadd <4 x float> %a, %b
     49   ret <4 x float> %r
     50 }
     51 
     52 ; SSE3: haddps2:
     53 ; SSE3-NOT: vhaddps
     54 ; SSE3: haddps
     55 ; AVX: haddps2:
     56 ; AVX: vhaddps
     57 define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
     58   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
     59   %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
     60   %r = fadd <4 x float> %a, %b
     61   ret <4 x float> %r
     62 }
     63 
     64 ; SSE3: haddps3:
     65 ; SSE3-NOT: vhaddps
     66 ; SSE3: haddps
     67 ; AVX: haddps3:
     68 ; AVX: vhaddps
     69 define <4 x float> @haddps3(<4 x float> %x) {
     70   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
     71   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
     72   %r = fadd <4 x float> %a, %b
     73   ret <4 x float> %r
     74 }
     75 
     76 ; SSE3: haddps4:
     77 ; SSE3-NOT: vhaddps
     78 ; SSE3: haddps
     79 ; AVX: haddps4:
     80 ; AVX: vhaddps
     81 define <4 x float> @haddps4(<4 x float> %x) {
     82   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
     83   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
     84   %r = fadd <4 x float> %a, %b
     85   ret <4 x float> %r
     86 }
     87 
     88 ; SSE3: haddps5:
     89 ; SSE3-NOT: vhaddps
     90 ; SSE3: haddps
     91 ; AVX: haddps5:
     92 ; AVX: vhaddps
     93 define <4 x float> @haddps5(<4 x float> %x) {
     94   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
     95   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
     96   %r = fadd <4 x float> %a, %b
     97   ret <4 x float> %r
     98 }
     99 
    100 ; SSE3: haddps6:
    101 ; SSE3-NOT: vhaddps
    102 ; SSE3: haddps
    103 ; AVX: haddps6:
    104 ; AVX: vhaddps
    105 define <4 x float> @haddps6(<4 x float> %x) {
    106   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    107   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    108   %r = fadd <4 x float> %a, %b
    109   ret <4 x float> %r
    110 }
    111 
    112 ; SSE3: haddps7:
    113 ; SSE3-NOT: vhaddps
    114 ; SSE3: haddps
    115 ; AVX: haddps7:
    116 ; AVX: vhaddps
    117 define <4 x float> @haddps7(<4 x float> %x) {
    118   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
    119   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
    120   %r = fadd <4 x float> %a, %b
    121   ret <4 x float> %r
    122 }
    123 
    124 ; SSE3: hsubpd1:
    125 ; SSE3-NOT: vhsubpd
    126 ; SSE3: hsubpd
    127 ; AVX: hsubpd1:
    128 ; AVX: vhsubpd
    129 define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
    130   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
    131   %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
    132   %r = fsub <2 x double> %a, %b
    133   ret <2 x double> %r
    134 }
    135 
    136 ; SSE3: hsubpd2:
    137 ; SSE3-NOT: vhsubpd
    138 ; SSE3: hsubpd
    139 ; AVX: hsubpd2:
    140 ; AVX: vhsubpd
    141 define <2 x double> @hsubpd2(<2 x double> %x) {
    142   %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
    143   %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
    144   %r = fsub <2 x double> %a, %b
    145   ret <2 x double> %r
    146 }
    147 
    148 ; SSE3: hsubps1:
    149 ; SSE3-NOT: vhsubps
    150 ; SSE3: hsubps
    151 ; AVX: hsubps1:
    152 ; AVX: vhsubps
    153 define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
    154   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    155   %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    156   %r = fsub <4 x float> %a, %b
    157   ret <4 x float> %r
    158 }
    159 
    160 ; SSE3: hsubps2:
    161 ; SSE3-NOT: vhsubps
    162 ; SSE3: hsubps
    163 ; AVX: hsubps2:
    164 ; AVX: vhsubps
    165 define <4 x float> @hsubps2(<4 x float> %x) {
    166   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
    167   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
    168   %r = fsub <4 x float> %a, %b
    169   ret <4 x float> %r
    170 }
    171 
    172 ; SSE3: hsubps3:
    173 ; SSE3-NOT: vhsubps
    174 ; SSE3: hsubps
    175 ; AVX: hsubps3:
    176 ; AVX: vhsubps
    177 define <4 x float> @hsubps3(<4 x float> %x) {
    178   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
    179   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
    180   %r = fsub <4 x float> %a, %b
    181   ret <4 x float> %r
    182 }
    183 
    184 ; SSE3: hsubps4:
    185 ; SSE3-NOT: vhsubps
    186 ; SSE3: hsubps
    187 ; AVX: hsubps4:
    188 ; AVX: vhsubps
    189 define <4 x float> @hsubps4(<4 x float> %x) {
    190   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    191   %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    192   %r = fsub <4 x float> %a, %b
    193   ret <4 x float> %r
    194 }
    195 
    196 ; SSE3: vhaddps1:
    197 ; SSE3-NOT: vhaddps
    198 ; SSE3: haddps
    199 ; SSE3: haddps
    200 ; AVX: vhaddps1:
    201 ; AVX: vhaddps
    202 define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
    203   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
    204   %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
    205   %r = fadd <8 x float> %a, %b
    206   ret <8 x float> %r
    207 }
    208 
    209 ; SSE3: vhaddps2:
    210 ; SSE3-NOT: vhaddps
    211 ; SSE3: haddps
    212 ; SSE3: haddps
    213 ; AVX: vhaddps2:
    214 ; AVX: vhaddps
    215 define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
    216   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
    217   %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
    218   %r = fadd <8 x float> %a, %b
    219   ret <8 x float> %r
    220 }
    221 
    222 ; SSE3: vhaddps3:
    223 ; SSE3-NOT: vhaddps
    224 ; SSE3: haddps
    225 ; SSE3: haddps
    226 ; AVX: vhaddps3:
    227 ; AVX: vhaddps
    228 define <8 x float> @vhaddps3(<8 x float> %x) {
    229   %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
    230   %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
    231   %r = fadd <8 x float> %a, %b
    232   ret <8 x float> %r
    233 }
    234 
    235 ; SSE3: vhsubps1:
    236 ; SSE3-NOT: vhsubps
    237 ; SSE3: hsubps
    238 ; SSE3: hsubps
    239 ; AVX: vhsubps1:
    240 ; AVX: vhsubps
    241 define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
    242   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
    243   %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
    244   %r = fsub <8 x float> %a, %b
    245   ret <8 x float> %r
    246 }
    247 
    248 ; SSE3: vhsubps3:
    249 ; SSE3-NOT: vhsubps
    250 ; SSE3: hsubps
    251 ; SSE3: hsubps
    252 ; AVX: vhsubps3:
    253 ; AVX: vhsubps
    254 define <8 x float> @vhsubps3(<8 x float> %x) {
    255   %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
    256   %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
    257   %r = fsub <8 x float> %a, %b
    258   ret <8 x float> %r
    259 }
    260 
    261 ; SSE3: vhaddpd1:
    262 ; SSE3-NOT: vhaddpd
    263 ; SSE3: haddpd
    264 ; SSE3: haddpd
    265 ; AVX: vhaddpd1:
    266 ; AVX: vhaddpd
    267 define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
    268   %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    269   %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    270   %r = fadd <4 x double> %a, %b
    271   ret <4 x double> %r
    272 }
    273 
    274 ; SSE3: vhsubpd1:
    275 ; SSE3-NOT: vhsubpd
    276 ; SSE3: hsubpd
    277 ; SSE3: hsubpd
    278 ; AVX: vhsubpd1:
    279 ; AVX: vhsubpd
    280 define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
    281   %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    282   %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    283   %r = fsub <4 x double> %a, %b
    284   ret <4 x double> %r
    285 }
    286