Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
      2 
      3 ; CHECK: vaddpd
      4 define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
      5 entry:
      6   %add.i = fadd <4 x double> %x, %y
      7   ret <4 x double> %add.i
      8 }
      9 
     10 ; CHECK: vaddpd LCP{{.*}}(%rip)
     11 define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
     12 entry:
     13   %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
     14   ret <4 x double> %add.i
     15 }
     16 
     17 ; CHECK: vaddps
     18 define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
     19 entry:
     20   %add.i = fadd <8 x float> %x, %y
     21   ret <8 x float> %add.i
     22 }
     23 
     24 ; CHECK: vaddps LCP{{.*}}(%rip)
     25 define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
     26 entry:
     27   %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
     28   ret <8 x float> %add.i
     29 }
     30 
     31 ; CHECK: vsubpd
     32 define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
     33 entry:
     34   %sub.i = fsub <4 x double> %x, %y
     35   ret <4 x double> %sub.i
     36 }
     37 
     38 ; CHECK: vsubpd (%
     39 define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
     40 entry:
     41   %tmp2 = load <4 x double>, <4 x double>* %x, align 32
     42   %sub.i = fsub <4 x double> %y, %tmp2
     43   ret <4 x double> %sub.i
     44 }
     45 
     46 ; CHECK: vsubps
     47 define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
     48 entry:
     49   %sub.i = fsub <8 x float> %x, %y
     50   ret <8 x float> %sub.i
     51 }
     52 
     53 ; CHECK: vsubps (%
     54 define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
     55 entry:
     56   %tmp2 = load <8 x float>, <8 x float>* %x, align 32
     57   %sub.i = fsub <8 x float> %y, %tmp2
     58   ret <8 x float> %sub.i
     59 }
     60 
     61 ; CHECK: vmulpd
     62 define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
     63 entry:
     64   %mul.i = fmul <4 x double> %x, %y
     65   ret <4 x double> %mul.i
     66 }
     67 
     68 ; CHECK: vmulpd LCP{{.*}}(%rip)
     69 define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
     70 entry:
     71   %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
     72   ret <4 x double> %mul.i
     73 }
     74 
     75 ; CHECK: vmulps
     76 define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
     77 entry:
     78   %mul.i = fmul <8 x float> %x, %y
     79   ret <8 x float> %mul.i
     80 }
     81 
     82 ; CHECK: vmulps LCP{{.*}}(%rip)
     83 define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
     84 entry:
     85   %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
     86   ret <8 x float> %mul.i
     87 }
     88 
     89 ; CHECK: vdivpd
     90 define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
     91 entry:
     92   %div.i = fdiv <4 x double> %x, %y
     93   ret <4 x double> %div.i
     94 }
     95 
     96 ; CHECK: vdivpd LCP{{.*}}(%rip)
     97 define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
     98 entry:
     99   %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    100   ret <4 x double> %div.i
    101 }
    102 
    103 ; CHECK: vdivps
    104 define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
    105 entry:
    106   %div.i = fdiv <8 x float> %x, %y
    107   ret <8 x float> %div.i
    108 }
    109 
    110 ; CHECK: vdivps LCP{{.*}}(%rip)
    111 define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
    112 entry:
    113   %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
    114   ret <8 x float> %div.i
    115 }
    116 
    117 ; CHECK: vsqrtss
    118 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
    119 entry:
    120   %conv1 = tail call float @sqrtf(float %a) nounwind readnone
    121   ret float %conv1
    122 }
    123 
    124 declare double @sqrt(double) readnone
    125 
    126 ; CHECK: vsqrtsd
    127 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
    128 entry:
    129   %call = tail call double @sqrt(double %a) nounwind readnone
    130   ret double %call
    131 }
    132 
    133 declare float @sqrtf(float) readnone
    134 
    135 
    136 ; CHECK: vextractf128 $1
    137 ; CHECK-NEXT: vextractf128 $1
    138 ; CHECK-NEXT: vpaddq %xmm
    139 ; CHECK-NEXT: vpaddq %xmm
    140 ; CHECK-NEXT: vinsertf128 $1
    141 define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    142   %x = add <4 x i64> %i, %j
    143   ret <4 x i64> %x
    144 }
    145 
    146 ; CHECK: vextractf128 $1
    147 ; CHECK-NEXT: vextractf128 $1
    148 ; CHECK-NEXT: vpaddd %xmm
    149 ; CHECK-NEXT: vpaddd %xmm
    150 ; CHECK-NEXT: vinsertf128 $1
    151 define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    152   %x = add <8 x i32> %i, %j
    153   ret <8 x i32> %x
    154 }
    155 
    156 ; CHECK: vextractf128 $1
    157 ; CHECK-NEXT: vextractf128 $1
    158 ; CHECK-NEXT: vpaddw %xmm
    159 ; CHECK-NEXT: vpaddw %xmm
    160 ; CHECK-NEXT: vinsertf128 $1
    161 define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    162   %x = add <16 x i16> %i, %j
    163   ret <16 x i16> %x
    164 }
    165 
    166 ; CHECK: vextractf128 $1
    167 ; CHECK-NEXT: vextractf128 $1
    168 ; CHECK-NEXT: vpaddb %xmm
    169 ; CHECK-NEXT: vpaddb %xmm
    170 ; CHECK-NEXT: vinsertf128 $1
    171 define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    172   %x = add <32 x i8> %i, %j
    173   ret <32 x i8> %x
    174 }
    175 
    176 ; CHECK: vextractf128 $1
    177 ; CHECK-NEXT: vextractf128 $1
    178 ; CHECK-NEXT: vpsubq %xmm
    179 ; CHECK-NEXT: vpsubq %xmm
    180 ; CHECK-NEXT: vinsertf128 $1
    181 define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    182   %x = sub <4 x i64> %i, %j
    183   ret <4 x i64> %x
    184 }
    185 
    186 ; CHECK: vextractf128 $1
    187 ; CHECK-NEXT: vextractf128 $1
    188 ; CHECK-NEXT: vpsubd %xmm
    189 ; CHECK-NEXT: vpsubd %xmm
    190 ; CHECK-NEXT: vinsertf128 $1
    191 define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    192   %x = sub <8 x i32> %i, %j
    193   ret <8 x i32> %x
    194 }
    195 
    196 ; CHECK: vextractf128 $1
    197 ; CHECK-NEXT: vextractf128 $1
    198 ; CHECK-NEXT: vpsubw %xmm
    199 ; CHECK-NEXT: vpsubw %xmm
    200 ; CHECK-NEXT: vinsertf128 $1
    201 define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    202   %x = sub <16 x i16> %i, %j
    203   ret <16 x i16> %x
    204 }
    205 
    206 ; CHECK: vextractf128 $1
    207 ; CHECK-NEXT: vextractf128 $1
    208 ; CHECK-NEXT: vpsubb %xmm
    209 ; CHECK-NEXT: vpsubb %xmm
    210 ; CHECK-NEXT: vinsertf128 $1
    211 define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    212   %x = sub <32 x i8> %i, %j
    213   ret <32 x i8> %x
    214 }
    215 
    216 ; CHECK: vextractf128 $1
    217 ; CHECK-NEXT: vextractf128 $1
    218 ; CHECK-NEXT: vpmulld %xmm
    219 ; CHECK-NEXT: vpmulld %xmm
    220 ; CHECK-NEXT: vinsertf128 $1
    221 define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    222   %x = mul <8 x i32> %i, %j
    223   ret <8 x i32> %x
    224 }
    225 
    226 ; CHECK: vextractf128 $1
    227 ; CHECK-NEXT: vextractf128 $1
    228 ; CHECK-NEXT: vpmullw %xmm
    229 ; CHECK-NEXT: vpmullw %xmm
    230 ; CHECK-NEXT: vinsertf128 $1
    231 define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    232   %x = mul <16 x i16> %i, %j
    233   ret <16 x i16> %x
    234 }
    235 
    236 ; CHECK: vextractf128 $1
    237 ; CHECK-NEXT: vextractf128 $1
    238 ; CHECK-NEXT: vpmuludq %xmm
    239 ; CHECK-NEXT: vpsrlq $32, %xmm
    240 ; CHECK-NEXT: vpmuludq %xmm
    241 ; CHECK-NEXT: vpsllq $32, %xmm
    242 ; CHECK-NEXT: vpaddq %xmm
    243 ; CHECK-NEXT: vpsrlq $32, %xmm
    244 ; CHECK-NEXT: vpmuludq %xmm
    245 ; CHECK-NEXT: vpsllq $32, %xmm
    246 ; CHECK-NEXT: vpaddq %xmm
    247 ; CHECK-NEXT: vpmuludq %xmm
    248 ; CHECK-NEXT: vpsrlq $32, %xmm
    249 ; CHECK-NEXT: vpmuludq %xmm
    250 ; CHECK-NEXT: vpsllq $32, %xmm
    251 ; CHECK-NEXT: vpaddq %xmm
    252 ; CHECK-NEXT: vpsrlq $32, %xmm
    253 ; CHECK-NEXT: vpmuludq %xmm
    254 ; CHECK-NEXT: vpsllq $32, %xmm
    255 ; CHECK-NEXT: vpaddq %xmm
    256 ; CHECK-NEXT: vinsertf128 $1
    257 define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    258   %x = mul <4 x i64> %i, %j
    259   ret <4 x i64> %x
    260 }
    261 
    262 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
    263 
    264 define <4 x float> @int_sqrt_ss() {
    265 ; CHECK: int_sqrt_ss
    266 ; CHECK: vsqrtss
    267  %x0 = load float, float addrspace(1)* undef, align 8
    268  %x1 = insertelement <4 x float> undef, float %x0, i32 0
    269  %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
    270  ret <4 x float> %x2
    271 }
    272