Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
      2 
      3 ; CHECK-LABEL: addpd512
      4 ; CHECK: vaddpd
      5 ; CHECK: ret
      6 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
      7 entry:
      8   %add.i = fadd <8 x double> %x, %y
      9   ret <8 x double> %add.i
     10 }
     11 
     12 ; CHECK-LABEL: addpd512fold
     13 ; CHECK: vaddpd LCP{{.*}}(%rip)
     14 ; CHECK: ret
     15 define <8 x double> @addpd512fold(<8 x double> %y) {
     16 entry:
     17   %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
     18   ret <8 x double> %add.i
     19 }
     20 
     21 ; CHECK-LABEL: addps512
     22 ; CHECK: vaddps
     23 ; CHECK: ret
     24 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
     25 entry:
     26   %add.i = fadd <16 x float> %x, %y
     27   ret <16 x float> %add.i
     28 }
     29 
     30 ; CHECK-LABEL: addps512fold
     31 ; CHECK: vaddps LCP{{.*}}(%rip)
     32 ; CHECK: ret
     33 define <16 x float> @addps512fold(<16 x float> %y) {
     34 entry:
     35   %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
     36   ret <16 x float> %add.i
     37 }
     38 
     39 ; CHECK-LABEL: subpd512
     40 ; CHECK: vsubpd
     41 ; CHECK: ret
     42 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
     43 entry:
     44   %sub.i = fsub <8 x double> %x, %y
     45   ret <8 x double> %sub.i
     46 }
     47 
     48 ; CHECK-LABEL: @subpd512fold
     49 ; CHECK: vsubpd (%
     50 ; CHECK: ret
     51 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
     52 entry:
     53   %tmp2 = load <8 x double>* %x, align 8
     54   %sub.i = fsub <8 x double> %y, %tmp2
     55   ret <8 x double> %sub.i
     56 }
     57 
     58 ; CHECK-LABEL: @subps512
     59 ; CHECK: vsubps
     60 ; CHECK: ret
     61 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
     62 entry:
     63   %sub.i = fsub <16 x float> %x, %y
     64   ret <16 x float> %sub.i
     65 }
     66 
     67 ; CHECK-LABEL: subps512fold
     68 ; CHECK: vsubps (%
     69 ; CHECK: ret
     70 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
     71 entry:
     72   %tmp2 = load <16 x float>* %x, align 4
     73   %sub.i = fsub <16 x float> %y, %tmp2
     74   ret <16 x float> %sub.i
     75 }
     76 
     77 ; CHECK-LABEL: imulq512
     78 ; CHECK: vpmuludq
     79 ; CHECK: vpmuludq
     80 ; CHECK: ret
     81 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
     82   %z = mul <8 x i64>%x, %y
     83   ret <8 x i64>%z
     84 }
     85 
     86 ; CHECK-LABEL: mulpd512
     87 ; CHECK: vmulpd
     88 ; CHECK: ret
     89 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
     90 entry:
     91   %mul.i = fmul <8 x double> %x, %y
     92   ret <8 x double> %mul.i
     93 }
     94 
     95 ; CHECK-LABEL: mulpd512fold
     96 ; CHECK: vmulpd LCP{{.*}}(%rip)
     97 ; CHECK: ret
     98 define <8 x double> @mulpd512fold(<8 x double> %y) {
     99 entry:
    100   %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    101   ret <8 x double> %mul.i
    102 }
    103 
    104 ; CHECK-LABEL: mulps512
    105 ; CHECK: vmulps
    106 ; CHECK: ret
    107 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
    108 entry:
    109   %mul.i = fmul <16 x float> %x, %y
    110   ret <16 x float> %mul.i
    111 }
    112 
    113 ; CHECK-LABEL: mulps512fold
    114 ; CHECK: vmulps LCP{{.*}}(%rip)
    115 ; CHECK: ret
    116 define <16 x float> @mulps512fold(<16 x float> %y) {
    117 entry:
    118   %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
    119   ret <16 x float> %mul.i
    120 }
    121 
    122 ; CHECK-LABEL: divpd512
    123 ; CHECK: vdivpd
    124 ; CHECK: ret
    125 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
    126 entry:
    127   %div.i = fdiv <8 x double> %x, %y
    128   ret <8 x double> %div.i
    129 }
    130 
    131 ; CHECK-LABEL: divpd512fold
    132 ; CHECK: vdivpd LCP{{.*}}(%rip)
    133 ; CHECK: ret
    134 define <8 x double> @divpd512fold(<8 x double> %y) {
    135 entry:
    136   %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    137   ret <8 x double> %div.i
    138 }
    139 
    140 ; CHECK-LABEL: divps512
    141 ; CHECK: vdivps
    142 ; CHECK: ret
    143 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
    144 entry:
    145   %div.i = fdiv <16 x float> %x, %y
    146   ret <16 x float> %div.i
    147 }
    148 
    149 ; CHECK-LABEL: divps512fold
    150 ; CHECK: vdivps LCP{{.*}}(%rip)
    151 ; CHECK: ret
    152 define <16 x float> @divps512fold(<16 x float> %y) {
    153 entry:
    154   %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
    155   ret <16 x float> %div.i
    156 }
    157 
    158 ; CHECK-LABEL: vpaddq_test
    159 ; CHECK: vpaddq %zmm
    160 ; CHECK: ret
    161 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
    162   %x = add <8 x i64> %i, %j
    163   ret <8 x i64> %x
    164 }
    165 
    166 ; CHECK-LABEL: vpaddq_fold_test
    167 ; CHECK: vpaddq (%
    168 ; CHECK: ret
    169 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
    170   %tmp = load <8 x i64>* %j, align 4
    171   %x = add <8 x i64> %i, %tmp
    172   ret <8 x i64> %x
    173 }
    174 
    175 ; CHECK-LABEL: vpaddq_broadcast_test
    176 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}
    177 ; CHECK: ret
    178 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
    179   %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
    180   ret <8 x i64> %x
    181 }
    182 
    183 ; CHECK-LABEL: vpaddq_broadcast2_test
    184 ; CHECK: vpaddq (%rdi){1to8}
    185 ; CHECK: ret
    186 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
    187   %tmp = load i64* %j
    188   %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
    189   %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
    190   %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
    191   %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
    192   %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
    193   %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
    194   %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
    195   %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
    196   %x = add <8 x i64> %i, %j.7
    197   ret <8 x i64> %x
    198 }
    199 
    200 ; CHECK-LABEL: vpaddd_test
    201 ; CHECK: vpaddd %zmm
    202 ; CHECK: ret
    203 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
    204   %x = add <16 x i32> %i, %j
    205   ret <16 x i32> %x
    206 }
    207 
    208 ; CHECK-LABEL: vpaddd_fold_test
    209 ; CHECK: vpaddd (%
    210 ; CHECK: ret
    211 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
    212   %tmp = load <16 x i32>* %j, align 4
    213   %x = add <16 x i32> %i, %tmp
    214   ret <16 x i32> %x
    215 }
    216 
    217 ; CHECK-LABEL: vpaddd_broadcast_test
    218 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
    219 ; CHECK: ret
    220 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
    221   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    222   ret <16 x i32> %x
    223 }
    224 
    225 ; CHECK-LABEL: vpaddd_mask_test
    226 ; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
    227 ; CHECK: ret
    228 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
    229   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    230   %x = add <16 x i32> %i, %j
    231   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    232   ret <16 x i32> %r
    233 }
    234 
    235 ; CHECK-LABEL: vpaddd_maskz_test
    236 ; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z} }}
    237 ; CHECK: ret
    238 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
    239   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    240   %x = add <16 x i32> %i, %j
    241   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    242   ret <16 x i32> %r
    243 }
    244 
    245 ; CHECK-LABEL: vpaddd_mask_fold_test
    246 ; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
    247 ; CHECK: ret
    248 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
    249   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    250   %j = load <16 x i32>* %j.ptr
    251   %x = add <16 x i32> %i, %j
    252   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    253   ret <16 x i32> %r
    254 }
    255 
    256 ; CHECK-LABEL: vpaddd_mask_broadcast_test
    257 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
    258 ; CHECK: ret
    259 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
    260   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    261   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    262   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    263   ret <16 x i32> %r
    264 }
    265 
    266 ; CHECK-LABEL: vpaddd_maskz_fold_test
    267 ; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
    268 ; CHECK: ret
    269 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
    270   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    271   %j = load <16 x i32>* %j.ptr
    272   %x = add <16 x i32> %i, %j
    273   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    274   ret <16 x i32> %r
    275 }
    276 
    277 ; CHECK-LABEL: vpaddd_maskz_broadcast_test
    278 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
    279 ; CHECK: ret
    280 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
    281   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    282   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    283   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    284   ret <16 x i32> %r
    285 }
    286 
    287 ; CHECK-LABEL: vpsubq_test
    288 ; CHECK: vpsubq %zmm
    289 ; CHECK: ret
    290 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
    291   %x = sub <8 x i64> %i, %j
    292   ret <8 x i64> %x
    293 }
    294 
    295 ; CHECK-LABEL: vpsubd_test
    296 ; CHECK: vpsubd
    297 ; CHECK: ret
    298 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
    299   %x = sub <16 x i32> %i, %j
    300   ret <16 x i32> %x
    301 }
    302 
    303 ; CHECK-LABEL: vpmulld_test
    304 ; CHECK: vpmulld %zmm
    305 ; CHECK: ret
    306 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
    307   %x = mul <16 x i32> %i, %j
    308   ret <16 x i32> %x
    309 }
    310 
    311 ; CHECK-LABEL: sqrtA
    312 ; CHECK: vsqrtss {{.*}} encoding: [0x62
    313 ; CHECK: ret
    314 declare float @sqrtf(float) readnone
    315 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
    316 entry:
    317   %conv1 = tail call float @sqrtf(float %a) nounwind readnone
    318   ret float %conv1
    319 }
    320 
    321 ; CHECK-LABEL: sqrtB
    322 ; CHECK: vsqrtsd {{.*}}## encoding: [0x62
    323 ; CHECK: ret
    324 declare double @sqrt(double) readnone
    325 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
    326 entry:
    327   %call = tail call double @sqrt(double %a) nounwind readnone
    328   ret double %call
    329 }
    330 
    331 ; CHECK-LABEL: sqrtC
    332 ; CHECK: vsqrtss {{.*}}## encoding: [0x62
    333 ; CHECK: ret
    334 declare float @llvm.sqrt.f32(float)
    335 define float @sqrtC(float %a) nounwind {
    336   %b = call float @llvm.sqrt.f32(float %a)
    337   ret float %b
    338 }
    339 
    340 ; CHECK-LABEL: sqrtD
    341 ; CHECK: vsqrtps {{.*}}
    342 ; CHECK: ret
    343 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
    344 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
    345   %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
    346   ret <16 x float> %b
    347 }
    348 
    349 ; CHECK-LABEL: sqrtE
    350 ; CHECK: vsqrtpd {{.*}}
    351 ; CHECK: ret
    352 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
    353 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
    354   %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
    355   ret <8 x double> %b
    356 }
    357 
    358 ; CHECK-LABEL: fadd_broadcast
    359 ; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
    360 ; CHECK: ret
    361 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
    362   %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
    363   ret <16 x float> %b
    364 }
    365 
    366 ; CHECK-LABEL: addq_broadcast
    367 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
    368 ; CHECK: ret
    369 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
    370   %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    371   ret <8 x i64> %b
    372 }
    373 
    374 ; CHECK-LABEL: orq_broadcast
    375 ; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
    376 ; CHECK: ret
    377 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
    378   %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    379   ret <8 x i64> %b
    380 }
    381 
    382 ; CHECK-LABEL: andd512fold
    383 ; CHECK: vpandd (%
    384 ; CHECK: ret
    385 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
    386 entry:
    387   %a = load <16 x i32>* %x, align 4
    388   %b = and <16 x i32> %y, %a
    389   ret <16 x i32> %b
    390 }
    391 
    392 ; CHECK-LABEL: andqbrst
    393 ; CHECK: vpandq  (%rdi){1to8}, %zmm
    394 ; CHECK: ret
    395 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
    396 entry:
    397   %a = load i64* %ap, align 8
    398   %b = insertelement <8 x i64> undef, i64 %a, i32 0
    399   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    400   %d = and <8 x i64> %p1, %c
    401   ret <8 x i64>%d
    402 }
    403