1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s 3 4 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 5 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 6 7 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 8 ; CHECK-LABEL: test_x86_vfnmadd_ps_z: 9 ; CHECK: ## BB#0: 10 ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 11 ; CHECK-NEXT: retq 12 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 13 ret <16 x float> %res 14 } 15 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 16 17 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 18 ; CHECK-LABEL: test_mask_vfnmadd_ps: 19 ; CHECK: ## BB#0: 20 ; CHECK-NEXT: kmovw %edi, %k1 21 ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1} 22 ; CHECK-NEXT: retq 23 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 24 ret <16 x float> %res 25 } 26 27 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 28 ; CHECK-LABEL: test_x86_vfnmadd_pd_z: 29 ; CHECK: ## BB#0: 30 ; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 31 ; CHECK-NEXT: retq 32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 33 ret <8 x double> %res 34 } 35 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 36 37 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 38 ; CHECK-LABEL: test_mask_vfnmadd_pd: 39 ; CHECK: ## BB#0: 40 ; CHECK-NEXT: kmovw %edi, %k1 41 ; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1} 42 ; CHECK-NEXT: retq 43 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 44 ret <8 x double> %res 45 } 46 47 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 48 ; CHECK-LABEL: test_x86_vfnmsubps_z: 49 ; CHECK: ## BB#0: 50 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 51 ; CHECK-NEXT: retq 52 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 53 ret <16 x float> %res 54 } 55 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 56 57 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 58 ; CHECK-LABEL: test_mask_vfnmsub_ps: 59 ; CHECK: ## BB#0: 60 ; CHECK-NEXT: kmovw %edi, %k1 61 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1} 62 ; CHECK-NEXT: retq 63 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 64 ret <16 x float> %res 65 } 66 67 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 68 ; CHECK-LABEL: test_x86_vfnmsubpd_z: 69 ; CHECK: ## BB#0: 70 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 71 ; CHECK-NEXT: retq 72 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 73 ret <8 x double> %res 74 } 75 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 76 77 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 78 ; CHECK-LABEL: test_mask_vfnmsub_pd: 79 ; CHECK: ## BB#0: 80 ; CHECK-NEXT: kmovw %edi, %k1 81 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} 82 ; CHECK-NEXT: retq 83 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 84 ret <8 x double> %res 85 } 86 87 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 88 ; CHECK-LABEL: test_x86_vfmaddsubps_z: 89 ; CHECK: ## BB#0: 90 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 91 ; CHECK-NEXT: retq 92 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 93 ret <16 x float> %res 94 } 95 96 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 97 ; CHECK-LABEL: test_mask_fmaddsub_ps: 98 ; CHECK: ## BB#0: 99 ; CHECK-NEXT: kmovw %edi, %k1 100 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} 101 ; CHECK-NEXT: retq 102 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) 103 ret <16 x float> %res 104 } 105 106 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 107 108 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 109 ; CHECK-LABEL: test_x86_vfmaddsubpd_z: 110 ; CHECK: ## BB#0: 111 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 112 ; CHECK-NEXT: retq 113 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 114 ret <8 x double> %res 115 } 116 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 117 118 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 119 ; CHECK-LABEL: test_mask_vfmaddsub_pd: 120 ; CHECK: ## BB#0: 121 ; CHECK-NEXT: kmovw %edi, %k1 122 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} 123 ; CHECK-NEXT: retq 124 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 125 ret <8 x double> %res 126 } 127 128 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 129 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 130 ; CHECK: ## BB#0: 131 ; CHECK-NEXT: kmovw %edi, %k1 132 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 133 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} 134 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 135 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 136 ; CHECK-NEXT: retq 137 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 138 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 139 %res2 = fadd <8 x double> %res, %res1 140 ret <8 x double> %res2 141 } 142 143 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 144 145 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 146 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 147 ; CHECK: ## BB#0: 148 ; CHECK-NEXT: kmovw %edi, %k1 149 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 150 ; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1} 151 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 152 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 153 ; CHECK-NEXT: retq 154 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 155 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 156 %res2 = fadd <8 x double> %res, %res1 157 ret <8 x double> %res2 158 } 159 160 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 161 162 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 163 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 164 ; CHECK: ## BB#0: 165 ; CHECK-NEXT: kmovw %edi, %k1 166 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 167 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 168 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 169 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 170 ; CHECK-NEXT: retq 171 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 172 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 173 %res2 = fadd <8 x double> %res, %res1 174 ret <8 x double> %res2 175 } 176 177 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 178 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 179 ; CHECK: ## BB#0: 180 ; CHECK-NEXT: kmovw %edi, %k1 181 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 182 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} 183 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 184 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 185 ; CHECK-NEXT: retq 186 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 187 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 188 %res2 = fadd <16 x float> %res, %res1 189 ret <16 x float> %res2 190 } 191 192 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 193 194 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 195 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 196 ; CHECK: ## BB#0: 197 ; CHECK-NEXT: kmovw %edi, %k1 198 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 199 ; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1} 200 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 201 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 202 ; CHECK-NEXT: retq 203 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 204 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 205 %res2 = fadd <16 x float> %res, %res1 206 ret <16 x float> %res2 207 } 208 209 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 210 211 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 212 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 213 ; CHECK: ## BB#0: 214 ; CHECK-NEXT: kmovw %edi, %k1 215 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 216 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 217 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 218 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 219 ; CHECK-NEXT: retq 220 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 221 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 222 %res2 = fadd <16 x float> %res, %res1 223 ret <16 x float> %res2 224 } 225 226 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 227 228 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 229 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 230 ; CHECK: ## BB#0: 231 ; CHECK-NEXT: kmovw %edi, %k1 232 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 233 ; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1} 234 ; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 235 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 236 ; CHECK-NEXT: retq 237 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 238 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 239 %res2 = fadd <8 x double> %res, %res1 240 ret <8 x double> %res2 241 } 242 243 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 244 245 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 246 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 247 ; CHECK: ## BB#0: 248 ; CHECK-NEXT: kmovw %edi, %k1 249 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 250 ; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1} 251 ; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 252 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 253 ; CHECK-NEXT: retq 254 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 255 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 256 %res2 = fadd <16 x float> %res, %res1 257 ret <16 x float> %res2 258 } 259 260 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 261 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne: 262 ; CHECK: ## BB#0: 263 ; CHECK-NEXT: kmovw %edi, %k1 264 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} 265 ; CHECK-NEXT: retq 266 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind 267 ret <16 x float> %res 268 } 269 270 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 271 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn: 272 ; CHECK: ## BB#0: 273 ; CHECK-NEXT: kmovw %edi, %k1 274 ; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} 275 ; CHECK-NEXT: retq 276 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind 277 ret <16 x float> %res 278 } 279 280 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 281 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp: 282 ; CHECK: ## BB#0: 283 ; CHECK-NEXT: kmovw %edi, %k1 284 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} 285 ; CHECK-NEXT: retq 286 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind 287 ret <16 x float> %res 288 } 289 290 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 291 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz: 292 ; CHECK: ## BB#0: 293 ; CHECK-NEXT: kmovw %edi, %k1 294 ; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} 295 ; CHECK-NEXT: retq 296 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind 297 ret <16 x float> %res 298 } 299 300 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 301 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current: 302 ; CHECK: ## BB#0: 303 ; CHECK-NEXT: kmovw %edi, %k1 304 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} 305 ; CHECK-NEXT: retq 306 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 307 ret <16 x float> %res 308 } 309 310 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 311 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne: 312 ; CHECK: ## BB#0: 313 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 314 ; CHECK-NEXT: retq 315 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind 316 ret <16 x float> %res 317 } 318 319 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 320 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn: 321 ; CHECK: ## BB#0: 322 ; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 323 ; CHECK-NEXT: retq 324 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind 325 ret <16 x float> %res 326 } 327 328 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 329 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp: 330 ; CHECK: ## BB#0: 331 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 332 ; CHECK-NEXT: retq 333 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind 334 ret <16 x float> %res 335 } 336 337 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 338 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz: 339 ; CHECK: ## BB#0: 340 ; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 341 ; CHECK-NEXT: retq 342 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind 343 ret <16 x float> %res 344 } 345 346 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 347 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current: 348 ; CHECK: ## BB#0: 349 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 350 ; CHECK-NEXT: retq 351 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 352 ret <16 x float> %res 353 } 354 355 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 356 357 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 358 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 359 ; CHECK: ## BB#0: 360 ; CHECK-NEXT: kmovw %edi, %k1 361 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 362 ; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 363 ; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 364 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 365 ; CHECK-NEXT: retq 366 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 367 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 368 %res2 = fadd <8 x double> %res, %res1 369 ret <8 x double> %res2 370 } 371 372 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 373 374 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 375 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 376 ; CHECK: ## BB#0: 377 ; CHECK-NEXT: kmovw %edi, %k1 378 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 379 ; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 380 ; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 381 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 382 ; CHECK-NEXT: retq 383 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 384 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 385 %res2 = fadd <16 x float> %res, %res1 386 ret <16 x float> %res2 387 } 388 389 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 390 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne: 391 ; CHECK: ## BB#0: 392 ; CHECK-NEXT: kmovw %edi, %k1 393 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} 394 ; CHECK-NEXT: retq 395 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 396 ret <8 x double> %res 397 } 398 399 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 400 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn: 401 ; CHECK: ## BB#0: 402 ; CHECK-NEXT: kmovw %edi, %k1 403 ; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} 404 ; CHECK-NEXT: retq 405 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 406 ret <8 x double> %res 407 } 408 409 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 410 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp: 411 ; CHECK: ## BB#0: 412 ; CHECK-NEXT: kmovw %edi, %k1 413 ; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} 414 ; CHECK-NEXT: retq 415 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 416 ret <8 x double> %res 417 } 418 419 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 420 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz: 421 ; CHECK: ## BB#0: 422 ; CHECK-NEXT: kmovw %edi, %k1 423 ; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} 424 ; CHECK-NEXT: retq 425 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 426 ret <8 x double> %res 427 } 428 429 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 430 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current: 431 ; CHECK: ## BB#0: 432 ; CHECK-NEXT: kmovw %edi, %k1 433 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} 434 ; CHECK-NEXT: retq 435 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 436 ret <8 x double> %res 437 } 438 439 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 440 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne: 441 ; CHECK: ## BB#0: 442 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 443 ; CHECK-NEXT: retq 444 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 445 ret <8 x double> %res 446 } 447 448 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 449 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn: 450 ; CHECK: ## BB#0: 451 ; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 452 ; CHECK-NEXT: retq 453 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 454 ret <8 x double> %res 455 } 456 457 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 458 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp: 459 ; CHECK: ## BB#0: 460 ; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 461 ; CHECK-NEXT: retq 462 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 463 ret <8 x double> %res 464 } 465 466 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 467 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz: 468 ; CHECK: ## BB#0: 469 ; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 470 ; CHECK-NEXT: retq 471 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 472 ret <8 x double> %res 473 } 474 475 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 476 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current: 477 ; CHECK: ## BB#0: 478 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 479 ; CHECK-NEXT: retq 480 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 481 ret <8 x double> %res 482 } 483 484 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 485 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 486 ; CHECK: ## BB#0: 487 ; CHECK-NEXT: kmovw %edi, %k1 488 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 489 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 490 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 491 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 492 ; CHECK-NEXT: retq 493 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 494 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 495 %res2 = fadd <8 x double> %res, %res1 496 ret <8 x double> %res2 497 } 498 499 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 500 501 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 502 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 503 ; CHECK: ## BB#0: 504 ; CHECK-NEXT: kmovw %edi, %k1 505 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 506 ; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1} 507 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 508 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 509 ; CHECK-NEXT: retq 510 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 511 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 512 %res2 = fadd <8 x double> %res, %res1 513 ret <8 x double> %res2 514 } 515 516 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 517 518 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 519 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 520 ; CHECK: ## BB#0: 521 ; CHECK-NEXT: kmovw %edi, %k1 522 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 523 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 524 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 525 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 526 ; CHECK-NEXT: retq 527 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 528 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 529 %res2 = fadd <8 x double> %res, %res1 530 ret <8 x double> %res2 531 } 532 533 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 534 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 535 ; CHECK: ## BB#0: 536 ; CHECK-NEXT: kmovw %edi, %k1 537 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 538 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 539 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 540 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 541 ; CHECK-NEXT: retq 542 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 543 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 544 %res2 = fadd <16 x float> %res, %res1 545 ret <16 x float> %res2 546 } 547 548 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 549 550 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 551 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 552 ; CHECK: ## BB#0: 553 ; CHECK-NEXT: kmovw %edi, %k1 554 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 555 ; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1} 556 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 557 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 558 ; CHECK-NEXT: retq 559 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 560 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 561 %res2 = fadd <16 x float> %res, %res1 562 ret <16 x float> %res2 563 } 564 565 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 566 567 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 568 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 569 ; CHECK: ## BB#0: 570 ; CHECK-NEXT: kmovw %edi, %k1 571 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 572 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 573 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 574 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 575 ; CHECK-NEXT: retq 576 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 577 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 578 %res2 = fadd <16 x float> %res, %res1 579 ret <16 x float> %res2 580 } 581 582 583 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 584 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne: 585 ; CHECK: ## BB#0: 586 ; CHECK-NEXT: kmovw %edi, %k1 587 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} 588 ; CHECK-NEXT: retq 589 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 590 ret <8 x double> %res 591 } 592 593 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 594 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn: 595 ; CHECK: ## BB#0: 596 ; CHECK-NEXT: kmovw %edi, %k1 597 ; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} 598 ; CHECK-NEXT: retq 599 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 600 ret <8 x double> %res 601 } 602 603 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 604 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp: 605 ; CHECK: ## BB#0: 606 ; CHECK-NEXT: kmovw %edi, %k1 607 ; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} 608 ; CHECK-NEXT: retq 609 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 610 ret <8 x double> %res 611 } 612 613 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 614 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz: 615 ; CHECK: ## BB#0: 616 ; CHECK-NEXT: kmovw %edi, %k1 617 ; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} 618 ; CHECK-NEXT: retq 619 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 620 ret <8 x double> %res 621 } 622 623 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 624 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current: 625 ; CHECK: ## BB#0: 626 ; CHECK-NEXT: kmovw %edi, %k1 627 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} 628 ; CHECK-NEXT: retq 629 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 630 ret <8 x double> %res 631 } 632 633 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 634 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne: 635 ; CHECK: ## BB#0: 636 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 637 ; CHECK-NEXT: retq 638 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 639 ret <8 x double> %res 640 } 641 642 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 643 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn: 644 ; CHECK: ## BB#0: 645 ; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 646 ; CHECK-NEXT: retq 647 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 648 ret <8 x double> %res 649 } 650 651 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 652 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp: 653 ; CHECK: ## BB#0: 654 ; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 655 ; CHECK-NEXT: retq 656 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 657 ret <8 x double> %res 658 } 659 660 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 661 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz: 662 ; CHECK: ## BB#0: 663 ; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 664 ; CHECK-NEXT: retq 665 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 666 ret <8 x double> %res 667 } 668 669 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 670 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current: 671 ; CHECK: ## BB#0: 672 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 673 ; CHECK-NEXT: retq 674 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 675 ret <8 x double> %res 676 } 677 678 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 679 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 680 ; CHECK: ## BB#0: 681 ; CHECK-NEXT: kmovw %edi, %k1 682 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 683 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1} 684 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 685 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 686 ; CHECK-NEXT: retq 687 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 688 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 689 %res2 = fadd <8 x double> %res, %res1 690 ret <8 x double> %res2 691 } 692 693 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 694 695 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 696 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 697 ; CHECK: ## BB#0: 698 ; CHECK-NEXT: kmovw %edi, %k1 699 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 700 ; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 701 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 702 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 703 ; CHECK-NEXT: retq 704 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 705 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 706 %res2 = fadd <8 x double> %res, %res1 707 ret <8 x double> %res2 708 } 709 710 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 711 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 712 ; CHECK: ## BB#0: 713 ; CHECK-NEXT: kmovw %edi, %k1 714 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 715 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1} 716 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 717 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 718 ; CHECK-NEXT: retq 719 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 720 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 721 %res2 = fadd <16 x float> %res, %res1 722 ret <16 x float> %res2 723 } 724 725 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 726 727 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 728 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 729 ; CHECK: ## BB#0: 730 ; CHECK-NEXT: kmovw %edi, %k1 731 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 732 ; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 733 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 734 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 735 ; CHECK-NEXT: retq 736 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 737 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 738 %res2 = fadd <16 x float> %res, %res1 739 ret <16 x float> %res2 740 } 741 742 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 743 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 744 ; CHECK: ## BB#0: 745 ; CHECK-NEXT: kmovw %edi, %k1 746 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 747 ; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 748 ; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 749 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 750 ; CHECK-NEXT: retq 751 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 752 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 753 %res2 = fadd <8 x double> %res, %res1 754 ret <8 x double> %res2 755 } 756 757 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 758 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 759 ; CHECK: ## BB#0: 760 ; CHECK-NEXT: kmovw %edi, %k1 761 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 762 ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 763 ; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 764 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 765 ; CHECK-NEXT: retq 766 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 767 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 768 %res2 = fadd <16 x float> %res, %res1 769 ret <16 x float> %res2 770 } 771