Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
      4 
      5 
      6 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
      7 ; CHECK-LABEL: test_rsqrt14_ss:
      8 ; CHECK:       ## %bb.0:
      9 ; CHECK-NEXT:    vrsqrt14ss %xmm0, %xmm0, %xmm0
     10 ; CHECK-NEXT:    retq
     11     %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
     12     ret <4 x float> %res
     13 }
     14 
     15 define <4 x float> @test_rsqrt14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
     16 ; CHECK-LABEL: test_rsqrt14_ss_load:
     17 ; CHECK:       ## %bb.0:
     18 ; CHECK-NEXT:    vrsqrt14ss (%rdi), %xmm0, %xmm0
     19 ; CHECK-NEXT:    retq
     20   %a1 = load <4 x float>, <4 x float>* %a1ptr
     21   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ;
     22   ret <4 x float> %res
     23 }
     24 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
     25 
     26 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
     27 ; CHECK-LABEL: test_rcp14_ss:
     28 ; CHECK:       ## %bb.0:
     29 ; CHECK-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
     30 ; CHECK-NEXT:    retq
     31     %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
     32     ret <4 x float> %res
     33 }
     34 
     35 define <4 x float> @test_rcp14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
     36 ; CHECK-LABEL: test_rcp14_ss_load:
     37 ; CHECK:       ## %bb.0:
     38 ; CHECK-NEXT:    vrcp14ss (%rdi), %xmm0, %xmm0
     39 ; CHECK-NEXT:    retq
     40   %a1 = load <4 x float>, <4 x float>* %a1ptr
     41   %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ;
     42   ret <4 x float> %res
     43 }
     44 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
     45 
     46 define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
     47 ; CHECK-LABEL: test_rsqrt14_sd:
     48 ; CHECK:       ## %bb.0:
     49 ; CHECK-NEXT:    vrsqrt14sd %xmm0, %xmm0, %xmm0
     50 ; CHECK-NEXT:    retq
     51     %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
     52     ret <2 x double> %res
     53 }
     54 
     55 define <2 x double> @test_rsqrt14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) {
     56 ; CHECK-LABEL: test_rsqrt14_sd_load:
     57 ; CHECK:       ## %bb.0:
     58 ; CHECK-NEXT:    vrsqrt14sd (%rdi), %xmm0, %xmm0
     59 ; CHECK-NEXT:    retq
     60   %a1 = load <2 x double>, <2 x double>* %a1ptr
     61   %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ;
     62   ret <2 x double> %res
     63 }
     64 declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
     65 
     66 define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
     67 ; CHECK-LABEL: test_rcp14_sd:
     68 ; CHECK:       ## %bb.0:
     69 ; CHECK-NEXT:    vrcp14sd %xmm0, %xmm0, %xmm0
     70 ; CHECK-NEXT:    retq
     71     %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
     72     ret <2 x double> %res
     73 
     74 }
     75 
     76 define <2 x double> @test_rcp14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) {
     77 ; CHECK-LABEL: test_rcp14_sd_load:
     78 ; CHECK:       ## %bb.0:
     79 ; CHECK-NEXT:    vrcp14sd (%rdi), %xmm0, %xmm0
     80 ; CHECK-NEXT:    retq
     81   %a1 = load <2 x double>, <2 x double>* %a1ptr
     82   %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ;
     83   ret <2 x double> %res
     84 }
     85 declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
     86 
     87 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
     88 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
     89 ; SKX-LABEL: test_int_x86_avx512_mask_scalef_ss:
     90 ; SKX:       ## %bb.0:
     91 ; SKX-NEXT:    kmovd %edi, %k1
     92 ; SKX-NEXT:    vscalefss %xmm1, %xmm0, %xmm2 {%k1}
     93 ; SKX-NEXT:    vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
     94 ; SKX-NEXT:    vaddps %xmm0, %xmm2, %xmm0
     95 ; SKX-NEXT:    retq
     96 ;
     97 ; KNL-LABEL: test_int_x86_avx512_mask_scalef_ss:
     98 ; KNL:       ## %bb.0:
     99 ; KNL-NEXT:    kmovw %edi, %k1
    100 ; KNL-NEXT:    vscalefss %xmm1, %xmm0, %xmm2 {%k1}
    101 ; KNL-NEXT:    vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
    102 ; KNL-NEXT:    vaddps %xmm0, %xmm2, %xmm0
    103 ; KNL-NEXT:    retq
    104     %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
    105     %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
    106     %res2 = fadd <4 x float> %res, %res1
    107     ret <4 x float> %res2
    108 }
    109 
    110 define <4 x float>@test_int_x86_avx512_mask_scalef_ss_load(<4 x float> %x0, <4 x float>* %x1ptr) {
    111 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss_load:
    112 ; CHECK:       ## %bb.0:
    113 ; CHECK-NEXT:    vscalefss (%rdi), %xmm0, %xmm0
    114 ; CHECK-NEXT:    retq
    115   %x1 = load <4 x float>, <4 x float>* %x1ptr
    116   %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4)
    117   ret <4 x float> %res
    118 }
    119 
    120 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
    121 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
    122 ; SKX-LABEL: test_int_x86_avx512_mask_scalef_sd:
    123 ; SKX:       ## %bb.0:
    124 ; SKX-NEXT:    kmovd %edi, %k1
    125 ; SKX-NEXT:    vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
    126 ; SKX-NEXT:    vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
    127 ; SKX-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    128 ; SKX-NEXT:    retq
    129 ;
    130 ; KNL-LABEL: test_int_x86_avx512_mask_scalef_sd:
    131 ; KNL:       ## %bb.0:
    132 ; KNL-NEXT:    kmovw %edi, %k1
    133 ; KNL-NEXT:    vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
    134 ; KNL-NEXT:    vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
    135 ; KNL-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    136 ; KNL-NEXT:    retq
    137     %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
    138     %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
    139     %res2 = fadd <2 x double> %res, %res1
    140     ret <2 x double> %res2
    141 }
    142 
    143 define <2 x double>@test_int_x86_avx512_mask_scalef_sd_load(<2 x double> %x0, <2 x double>* %x1ptr) {
    144 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd_load:
    145 ; CHECK:       ## %bb.0:
    146 ; CHECK-NEXT:    vscalefsd (%rdi), %xmm0, %xmm0
    147 ; CHECK-NEXT:    retq
    148   %x1 = load <2 x double>, <2 x double>* %x1ptr
    149   %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> undef, i8 -1, i32 4)
    150   ret <2 x double> %res
    151 }
    152