1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 4 5 6 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { 7 ; CHECK-LABEL: test_rsqrt14_ss: 8 ; CHECK: ## %bb.0: 9 ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 10 ; CHECK-NEXT: retq 11 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; 12 ret <4 x float> %res 13 } 14 15 define <4 x float> @test_rsqrt14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 16 ; CHECK-LABEL: test_rsqrt14_ss_load: 17 ; CHECK: ## %bb.0: 18 ; CHECK-NEXT: vrsqrt14ss (%rdi), %xmm0, %xmm0 19 ; CHECK-NEXT: retq 20 %a1 = load <4 x float>, <4 x float>* %a1ptr 21 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ; 22 ret <4 x float> %res 23 } 24 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 25 26 define <4 x float> @test_rcp14_ss(<4 x float> %a0) { 27 ; CHECK-LABEL: test_rcp14_ss: 28 ; CHECK: ## %bb.0: 29 ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 30 ; CHECK-NEXT: retq 31 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; 32 ret <4 x float> %res 33 } 34 35 define <4 x float> @test_rcp14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 36 ; CHECK-LABEL: test_rcp14_ss_load: 37 ; CHECK: ## %bb.0: 38 ; CHECK-NEXT: vrcp14ss (%rdi), %xmm0, %xmm0 39 ; CHECK-NEXT: retq 40 %a1 = load <4 x float>, <4 x float>* %a1ptr 41 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ; 42 ret <4 x float> %res 43 } 44 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 45 46 define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) { 47 ; CHECK-LABEL: test_rsqrt14_sd: 48 ; CHECK: ## %bb.0: 49 ; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0 50 ; CHECK-NEXT: retq 51 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ; 52 ret <2 x double> %res 53 } 54 55 define <2 x double> @test_rsqrt14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) { 56 ; CHECK-LABEL: test_rsqrt14_sd_load: 57 ; CHECK: ## %bb.0: 58 ; CHECK-NEXT: vrsqrt14sd (%rdi), %xmm0, %xmm0 59 ; CHECK-NEXT: retq 60 %a1 = load <2 x double>, <2 x double>* %a1ptr 61 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ; 62 ret <2 x double> %res 63 } 64 declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 65 66 define <2 x double> @test_rcp14_sd(<2 x double> %a0) { 67 ; CHECK-LABEL: test_rcp14_sd: 68 ; CHECK: ## %bb.0: 69 ; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0 70 ; CHECK-NEXT: retq 71 %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ; 72 ret <2 x double> %res 73 74 } 75 76 define <2 x double> @test_rcp14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) { 77 ; CHECK-LABEL: test_rcp14_sd_load: 78 ; CHECK: ## %bb.0: 79 ; CHECK-NEXT: vrcp14sd (%rdi), %xmm0, %xmm0 80 ; CHECK-NEXT: retq 81 %a1 = load <2 x double>, <2 x double>* %a1ptr 82 %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ; 83 ret <2 x double> %res 84 } 85 declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 86 87 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) 88 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 89 ; SKX-LABEL: test_int_x86_avx512_mask_scalef_ss: 90 ; SKX: ## %bb.0: 91 ; SKX-NEXT: kmovd %edi, %k1 92 ; SKX-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1} 93 ; SKX-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 94 ; SKX-NEXT: vaddps %xmm0, %xmm2, %xmm0 95 ; SKX-NEXT: retq 96 ; 97 ; KNL-LABEL: test_int_x86_avx512_mask_scalef_ss: 98 ; KNL: ## %bb.0: 99 ; KNL-NEXT: kmovw %edi, %k1 100 ; KNL-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1} 101 ; KNL-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 102 ; KNL-NEXT: vaddps %xmm0, %xmm2, %xmm0 103 ; KNL-NEXT: retq 104 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) 105 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) 106 %res2 = fadd <4 x float> %res, %res1 107 ret <4 x float> %res2 108 } 109 110 define <4 x float>@test_int_x86_avx512_mask_scalef_ss_load(<4 x float> %x0, <4 x float>* %x1ptr) { 111 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss_load: 112 ; CHECK: ## %bb.0: 113 ; CHECK-NEXT: vscalefss (%rdi), %xmm0, %xmm0 114 ; CHECK-NEXT: retq 115 %x1 = load <4 x float>, <4 x float>* %x1ptr 116 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4) 117 ret <4 x float> %res 118 } 119 120 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) 121 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 122 ; SKX-LABEL: test_int_x86_avx512_mask_scalef_sd: 123 ; SKX: ## %bb.0: 124 ; SKX-NEXT: kmovd %edi, %k1 125 ; SKX-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} 126 ; SKX-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 127 ; SKX-NEXT: vaddpd %xmm0, %xmm2, %xmm0 128 ; SKX-NEXT: retq 129 ; 130 ; KNL-LABEL: test_int_x86_avx512_mask_scalef_sd: 131 ; KNL: ## %bb.0: 132 ; KNL-NEXT: kmovw %edi, %k1 133 ; KNL-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} 134 ; KNL-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 135 ; KNL-NEXT: vaddpd %xmm0, %xmm2, %xmm0 136 ; KNL-NEXT: retq 137 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) 138 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) 139 %res2 = fadd <2 x double> %res, %res1 140 ret <2 x double> %res2 141 } 142 143 define <2 x double>@test_int_x86_avx512_mask_scalef_sd_load(<2 x double> %x0, <2 x double>* %x1ptr) { 144 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd_load: 145 ; CHECK: ## %bb.0: 146 ; CHECK-NEXT: vscalefsd (%rdi), %xmm0, %xmm0 147 ; CHECK-NEXT: retq 148 %x1 = load <2 x double>, <2 x double>* %x1ptr 149 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> undef, i8 -1, i32 4) 150 ret <2 x double> %res 151 } 152