Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
      3 
      4 define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
      5 ; CHECK-LABEL: addpd256:
      6 ; CHECK:       ## %bb.0: ## %entry
      7 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
      8 ; CHECK-NEXT:    retq
      9 entry:
     10   %add.i = fadd <4 x double> %x, %y
     11   ret <4 x double> %add.i
     12 }
     13 
     14 define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
     15 ; CHECK-LABEL: addpd256fold:
     16 ; CHECK:       ## %bb.0: ## %entry
     17 ; CHECK-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
     18 ; CHECK-NEXT:    retq
     19 entry:
     20   %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
     21   ret <4 x double> %add.i
     22 }
     23 
     24 define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
     25 ; CHECK-LABEL: addps256:
     26 ; CHECK:       ## %bb.0: ## %entry
     27 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
     28 ; CHECK-NEXT:    retq
     29 entry:
     30   %add.i = fadd <8 x float> %x, %y
     31   ret <8 x float> %add.i
     32 }
     33 
     34 define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
     35 ; CHECK-LABEL: addps256fold:
     36 ; CHECK:       ## %bb.0: ## %entry
     37 ; CHECK-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
     38 ; CHECK-NEXT:    retq
     39 entry:
     40   %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
     41   ret <8 x float> %add.i
     42 }
     43 
     44 define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
     45 ; CHECK-LABEL: subpd256:
     46 ; CHECK:       ## %bb.0: ## %entry
     47 ; CHECK-NEXT:    vsubpd %ymm0, %ymm1, %ymm0
     48 ; CHECK-NEXT:    retq
     49 entry:
     50   %sub.i = fsub <4 x double> %x, %y
     51   ret <4 x double> %sub.i
     52 }
     53 
     54 define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
     55 ; CHECK-LABEL: subpd256fold:
     56 ; CHECK:       ## %bb.0: ## %entry
     57 ; CHECK-NEXT:    vsubpd (%rdi), %ymm0, %ymm0
     58 ; CHECK-NEXT:    retq
     59 entry:
     60   %tmp2 = load <4 x double>, <4 x double>* %x, align 32
     61   %sub.i = fsub <4 x double> %y, %tmp2
     62   ret <4 x double> %sub.i
     63 }
     64 
     65 define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
     66 ; CHECK-LABEL: subps256:
     67 ; CHECK:       ## %bb.0: ## %entry
     68 ; CHECK-NEXT:    vsubps %ymm0, %ymm1, %ymm0
     69 ; CHECK-NEXT:    retq
     70 entry:
     71   %sub.i = fsub <8 x float> %x, %y
     72   ret <8 x float> %sub.i
     73 }
     74 
     75 define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
     76 ; CHECK-LABEL: subps256fold:
     77 ; CHECK:       ## %bb.0: ## %entry
     78 ; CHECK-NEXT:    vsubps (%rdi), %ymm0, %ymm0
     79 ; CHECK-NEXT:    retq
     80 entry:
     81   %tmp2 = load <8 x float>, <8 x float>* %x, align 32
     82   %sub.i = fsub <8 x float> %y, %tmp2
     83   ret <8 x float> %sub.i
     84 }
     85 
     86 define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
     87 ; CHECK-LABEL: mulpd256:
     88 ; CHECK:       ## %bb.0: ## %entry
     89 ; CHECK-NEXT:    vmulpd %ymm0, %ymm1, %ymm0
     90 ; CHECK-NEXT:    retq
     91 entry:
     92   %mul.i = fmul <4 x double> %x, %y
     93   ret <4 x double> %mul.i
     94 }
     95 
     96 define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
     97 ; CHECK-LABEL: mulpd256fold:
     98 ; CHECK:       ## %bb.0: ## %entry
     99 ; CHECK-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
    100 ; CHECK-NEXT:    retq
    101 entry:
    102   %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    103   ret <4 x double> %mul.i
    104 }
    105 
    106 define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
    107 ; CHECK-LABEL: mulps256:
    108 ; CHECK:       ## %bb.0: ## %entry
    109 ; CHECK-NEXT:    vmulps %ymm0, %ymm1, %ymm0
    110 ; CHECK-NEXT:    retq
    111 entry:
    112   %mul.i = fmul <8 x float> %x, %y
    113   ret <8 x float> %mul.i
    114 }
    115 
    116 define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
    117 ; CHECK-LABEL: mulps256fold:
    118 ; CHECK:       ## %bb.0: ## %entry
    119 ; CHECK-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
    120 ; CHECK-NEXT:    retq
    121 entry:
    122   %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
    123   ret <8 x float> %mul.i
    124 }
    125 
    126 define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
    127 ; CHECK-LABEL: divpd256:
    128 ; CHECK:       ## %bb.0: ## %entry
    129 ; CHECK-NEXT:    vdivpd %ymm0, %ymm1, %ymm0
    130 ; CHECK-NEXT:    retq
    131 entry:
    132   %div.i = fdiv <4 x double> %x, %y
    133   ret <4 x double> %div.i
    134 }
    135 
    136 define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
    137 ; CHECK-LABEL: divpd256fold:
    138 ; CHECK:       ## %bb.0: ## %entry
    139 ; CHECK-NEXT:    vdivpd {{.*}}(%rip), %ymm0, %ymm0
    140 ; CHECK-NEXT:    retq
    141 entry:
    142   %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    143   ret <4 x double> %div.i
    144 }
    145 
    146 define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
    147 ; CHECK-LABEL: divps256:
    148 ; CHECK:       ## %bb.0: ## %entry
    149 ; CHECK-NEXT:    vdivps %ymm0, %ymm1, %ymm0
    150 ; CHECK-NEXT:    retq
    151 entry:
    152   %div.i = fdiv <8 x float> %x, %y
    153   ret <8 x float> %div.i
    154 }
    155 
    156 define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
    157 ; CHECK-LABEL: divps256fold:
    158 ; CHECK:       ## %bb.0: ## %entry
    159 ; CHECK-NEXT:    vdivps {{.*}}(%rip), %ymm0, %ymm0
    160 ; CHECK-NEXT:    retq
    161 entry:
    162   %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
    163   ret <8 x float> %div.i
    164 }
    165 
    166 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
    167 ; CHECK-LABEL: sqrtA:
    168 ; CHECK:       ## %bb.0: ## %entry
    169 ; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
    170 ; CHECK-NEXT:    retq
    171 entry:
    172   %conv1 = tail call float @sqrtf(float %a) nounwind readnone
    173   ret float %conv1
    174 }
    175 
    176 declare double @sqrt(double) readnone
    177 
    178 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
    179 ; CHECK-LABEL: sqrtB:
    180 ; CHECK:       ## %bb.0: ## %entry
    181 ; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
    182 ; CHECK-NEXT:    retq
    183 entry:
    184   %call = tail call double @sqrt(double %a) nounwind readnone
    185   ret double %call
    186 }
    187 
    188 declare float @sqrtf(float) readnone
    189 
    190 
    191 define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    192 ; CHECK-LABEL: vpaddq:
    193 ; CHECK:       ## %bb.0:
    194 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    195 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    196 ; CHECK-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
    197 ; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
    198 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    199 ; CHECK-NEXT:    retq
    200   %x = add <4 x i64> %i, %j
    201   ret <4 x i64> %x
    202 }
    203 
    204 define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    205 ; CHECK-LABEL: vpaddd:
    206 ; CHECK:       ## %bb.0:
    207 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    208 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    209 ; CHECK-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
    210 ; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    211 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    212 ; CHECK-NEXT:    retq
    213   %x = add <8 x i32> %i, %j
    214   ret <8 x i32> %x
    215 }
    216 
    217 define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    218 ; CHECK-LABEL: vpaddw:
    219 ; CHECK:       ## %bb.0:
    220 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    221 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    222 ; CHECK-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
    223 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
    224 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    225 ; CHECK-NEXT:    retq
    226   %x = add <16 x i16> %i, %j
    227   ret <16 x i16> %x
    228 }
    229 
    230 define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    231 ; CHECK-LABEL: vpaddb:
    232 ; CHECK:       ## %bb.0:
    233 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    234 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    235 ; CHECK-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
    236 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
    237 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    238 ; CHECK-NEXT:    retq
    239   %x = add <32 x i8> %i, %j
    240   ret <32 x i8> %x
    241 }
    242 
    243 define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    244 ; CHECK-LABEL: vpsubq:
    245 ; CHECK:       ## %bb.0:
    246 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    247 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    248 ; CHECK-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
    249 ; CHECK-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
    250 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    251 ; CHECK-NEXT:    retq
    252   %x = sub <4 x i64> %i, %j
    253   ret <4 x i64> %x
    254 }
    255 
    256 define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    257 ; CHECK-LABEL: vpsubd:
    258 ; CHECK:       ## %bb.0:
    259 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    260 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    261 ; CHECK-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
    262 ; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
    263 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    264 ; CHECK-NEXT:    retq
    265   %x = sub <8 x i32> %i, %j
    266   ret <8 x i32> %x
    267 }
    268 
    269 define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    270 ; CHECK-LABEL: vpsubw:
    271 ; CHECK:       ## %bb.0:
    272 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    273 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    274 ; CHECK-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
    275 ; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
    276 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    277 ; CHECK-NEXT:    retq
    278   %x = sub <16 x i16> %i, %j
    279   ret <16 x i16> %x
    280 }
    281 
    282 define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    283 ; CHECK-LABEL: vpsubb:
    284 ; CHECK:       ## %bb.0:
    285 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    286 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    287 ; CHECK-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
    288 ; CHECK-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
    289 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    290 ; CHECK-NEXT:    retq
    291   %x = sub <32 x i8> %i, %j
    292   ret <32 x i8> %x
    293 }
    294 
    295 define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    296 ; CHECK-LABEL: vpmulld:
    297 ; CHECK:       ## %bb.0:
    298 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    299 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    300 ; CHECK-NEXT:    vpmulld %xmm2, %xmm3, %xmm2
    301 ; CHECK-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
    302 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    303 ; CHECK-NEXT:    retq
    304   %x = mul <8 x i32> %i, %j
    305   ret <8 x i32> %x
    306 }
    307 
    308 define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    309 ; CHECK-LABEL: vpmullw:
    310 ; CHECK:       ## %bb.0:
    311 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    312 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    313 ; CHECK-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
    314 ; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
    315 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    316 ; CHECK-NEXT:    retq
    317   %x = mul <16 x i16> %i, %j
    318   ret <16 x i16> %x
    319 }
    320 
    321 define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    322 ; CHECK-LABEL: mul_v4i64:
    323 ; CHECK:       ## %bb.0:
    324 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
    325 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
    326 ; CHECK-NEXT:    vpsrlq $32, %xmm3, %xmm4
    327 ; CHECK-NEXT:    vpmuludq %xmm2, %xmm4, %xmm4
    328 ; CHECK-NEXT:    vpsrlq $32, %xmm2, %xmm5
    329 ; CHECK-NEXT:    vpmuludq %xmm5, %xmm3, %xmm5
    330 ; CHECK-NEXT:    vpaddq %xmm4, %xmm5, %xmm4
    331 ; CHECK-NEXT:    vpsllq $32, %xmm4, %xmm4
    332 ; CHECK-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
    333 ; CHECK-NEXT:    vpaddq %xmm4, %xmm2, %xmm2
    334 ; CHECK-NEXT:    vpsrlq $32, %xmm0, %xmm3
    335 ; CHECK-NEXT:    vpmuludq %xmm1, %xmm3, %xmm3
    336 ; CHECK-NEXT:    vpsrlq $32, %xmm1, %xmm4
    337 ; CHECK-NEXT:    vpmuludq %xmm4, %xmm0, %xmm4
    338 ; CHECK-NEXT:    vpaddq %xmm3, %xmm4, %xmm3
    339 ; CHECK-NEXT:    vpsllq $32, %xmm3, %xmm3
    340 ; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    341 ; CHECK-NEXT:    vpaddq %xmm3, %xmm0, %xmm0
    342 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    343 ; CHECK-NEXT:    retq
    344   %x = mul <4 x i64> %i, %j
    345   ret <4 x i64> %x
    346 }
    347 
    348 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
    349 
    350 define <4 x float> @int_sqrt_ss() {
    351 ; CHECK-LABEL: int_sqrt_ss:
    352 ; CHECK:       ## %bb.0:
    353 ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    354 ; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
    355 ; CHECK-NEXT:    retq
    356  %x0 = load float, float addrspace(1)* undef, align 8
    357  %x1 = insertelement <4 x float> undef, float %x0, i32 0
    358  %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
    359  ret <4 x float> %x2
    360 }
    361 
    362 define <2 x double> @vector_sqrt_scalar_load(double* %a0) optsize {
    363 ; CHECK-LABEL: vector_sqrt_scalar_load:
    364 ; CHECK:       ## %bb.0:
    365 ; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    366 ; CHECK-NEXT:    vsqrtpd %xmm0, %xmm0
    367 ; CHECK-NEXT:    retq
    368   %a1 = load double, double* %a0
    369   %a2 = insertelement <2 x double> undef, double %a1, i32 0
    370   %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a2) ; <<2 x double>> [#uses=1]
    371   ret <2 x double> %res
    372 }
    373 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
    374