1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s 3 4 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 5 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 6 7 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 8 ; CHECK-LABEL: test_x86_vfnmadd_ps_z: 9 ; CHECK: ## %bb.0: 10 ; CHECK-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 11 ; CHECK-NEXT: retq 12 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 13 ret <16 x float> %res 14 } 15 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 16 17 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 18 ; CHECK-LABEL: test_mask_vfnmadd_ps: 19 ; CHECK: ## %bb.0: 20 ; CHECK-NEXT: kmovw %edi, %k1 21 ; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 22 ; CHECK-NEXT: retq 23 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 24 ret <16 x float> %res 25 } 26 27 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 28 ; CHECK-LABEL: test_x86_vfnmadd_pd_z: 29 ; CHECK: ## %bb.0: 30 ; CHECK-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 31 ; CHECK-NEXT: retq 32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 33 ret <8 x double> %res 34 } 35 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 36 37 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 38 ; CHECK-LABEL: test_mask_vfnmadd_pd: 39 ; CHECK: ## %bb.0: 40 ; CHECK-NEXT: kmovw %edi, %k1 41 ; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 42 ; CHECK-NEXT: retq 43 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 44 ret <8 x double> %res 45 } 46 47 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 48 ; CHECK-LABEL: test_x86_vfnmsubps_z: 49 ; CHECK: ## %bb.0: 50 ; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 51 ; CHECK-NEXT: retq 52 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 53 ret <16 x float> %res 54 } 55 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 56 57 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 58 ; CHECK-LABEL: test_mask_vfnmsub_ps: 59 ; CHECK: ## %bb.0: 60 ; CHECK-NEXT: kmovw %edi, %k1 61 ; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 62 ; CHECK-NEXT: retq 63 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 64 ret <16 x float> %res 65 } 66 67 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 68 ; CHECK-LABEL: test_x86_vfnmsubpd_z: 69 ; CHECK: ## %bb.0: 70 ; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 71 ; CHECK-NEXT: retq 72 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 73 ret <8 x double> %res 74 } 75 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 76 77 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 78 ; CHECK-LABEL: test_mask_vfnmsub_pd: 79 ; CHECK: ## %bb.0: 80 ; CHECK-NEXT: kmovw %edi, %k1 81 ; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 82 ; CHECK-NEXT: retq 83 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 84 ret <8 x double> %res 85 } 86 87 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 88 ; CHECK-LABEL: test_x86_vfmaddsubps_z: 89 ; CHECK: ## %bb.0: 90 ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 91 ; CHECK-NEXT: retq 92 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 93 ret <16 x float> %res 94 } 95 96 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 97 ; CHECK-LABEL: test_mask_fmaddsub_ps: 98 ; CHECK: ## %bb.0: 99 ; CHECK-NEXT: kmovw %edi, %k1 100 ; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 101 ; CHECK-NEXT: retq 102 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) 103 ret <16 x float> %res 104 } 105 106 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 107 108 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 109 ; CHECK-LABEL: test_x86_vfmaddsubpd_z: 110 ; CHECK: ## %bb.0: 111 ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 112 ; CHECK-NEXT: retq 113 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 114 ret <8 x double> %res 115 } 116 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 117 118 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 119 ; CHECK-LABEL: test_mask_vfmaddsub_pd: 120 ; CHECK: ## %bb.0: 121 ; CHECK-NEXT: kmovw %edi, %k1 122 ; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 123 ; CHECK-NEXT: retq 124 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 125 ret <8 x double> %res 126 } 127 128 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 129 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 130 ; CHECK: ## %bb.0: 131 ; CHECK-NEXT: kmovw %edi, %k1 132 ; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 133 ; CHECK-NEXT: retq 134 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 135 ret <8 x double> %res 136 } 137 138 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 139 140 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 141 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 142 ; CHECK: ## %bb.0: 143 ; CHECK-NEXT: kmovw %edi, %k1 144 ; CHECK-NEXT: vfmaddsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2 145 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 146 ; CHECK-NEXT: retq 147 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 148 ret <8 x double> %res 149 } 150 151 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 152 153 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 154 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 155 ; CHECK: ## %bb.0: 156 ; CHECK-NEXT: kmovw %edi, %k1 157 ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 158 ; CHECK-NEXT: retq 159 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 160 ret <8 x double> %res 161 } 162 163 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 164 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 165 ; CHECK: ## %bb.0: 166 ; CHECK-NEXT: kmovw %edi, %k1 167 ; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 168 ; CHECK-NEXT: retq 169 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 170 ret <16 x float> %res 171 } 172 173 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 174 175 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 176 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 177 ; CHECK: ## %bb.0: 178 ; CHECK-NEXT: kmovw %edi, %k1 179 ; CHECK-NEXT: vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2 180 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 181 ; CHECK-NEXT: retq 182 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 183 ret <16 x float> %res 184 } 185 186 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 187 188 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 189 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 190 ; CHECK: ## %bb.0: 191 ; CHECK-NEXT: kmovw %edi, %k1 192 ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 193 ; CHECK-NEXT: retq 194 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 195 ret <16 x float> %res 196 } 197 198 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 199 200 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 201 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 202 ; CHECK: ## %bb.0: 203 ; CHECK-NEXT: kmovw %edi, %k1 204 ; CHECK-NEXT: vfmsubadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2 205 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 206 ; CHECK-NEXT: retq 207 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 208 ret <8 x double> %res 209 } 210 211 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 212 213 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 214 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 215 ; CHECK: ## %bb.0: 216 ; CHECK-NEXT: kmovw %edi, %k1 217 ; CHECK-NEXT: vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2 218 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 219 ; CHECK-NEXT: retq 220 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 221 ret <16 x float> %res 222 } 223 224 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 225 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne: 226 ; CHECK: ## %bb.0: 227 ; CHECK-NEXT: kmovw %edi, %k1 228 ; CHECK-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 229 ; CHECK-NEXT: retq 230 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind 231 ret <16 x float> %res 232 } 233 234 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 235 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn: 236 ; CHECK: ## %bb.0: 237 ; CHECK-NEXT: kmovw %edi, %k1 238 ; CHECK-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 239 ; CHECK-NEXT: retq 240 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind 241 ret <16 x float> %res 242 } 243 244 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 245 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp: 246 ; CHECK: ## %bb.0: 247 ; CHECK-NEXT: kmovw %edi, %k1 248 ; CHECK-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 249 ; CHECK-NEXT: retq 250 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind 251 ret <16 x float> %res 252 } 253 254 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 255 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz: 256 ; CHECK: ## %bb.0: 257 ; CHECK-NEXT: kmovw %edi, %k1 258 ; CHECK-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} 259 ; CHECK-NEXT: retq 260 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind 261 ret <16 x float> %res 262 } 263 264 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 265 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current: 266 ; CHECK: ## %bb.0: 267 ; CHECK-NEXT: kmovw %edi, %k1 268 ; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2 269 ; CHECK-NEXT: retq 270 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 271 ret <16 x float> %res 272 } 273 274 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 275 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne: 276 ; CHECK: ## %bb.0: 277 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 278 ; CHECK-NEXT: retq 279 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind 280 ret <16 x float> %res 281 } 282 283 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 284 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn: 285 ; CHECK: ## %bb.0: 286 ; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 287 ; CHECK-NEXT: retq 288 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind 289 ret <16 x float> %res 290 } 291 292 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 293 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp: 294 ; CHECK: ## %bb.0: 295 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 296 ; CHECK-NEXT: retq 297 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind 298 ret <16 x float> %res 299 } 300 301 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 302 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz: 303 ; CHECK: ## %bb.0: 304 ; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 305 ; CHECK-NEXT: retq 306 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind 307 ret <16 x float> %res 308 } 309 310 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 311 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current: 312 ; CHECK: ## %bb.0: 313 ; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 314 ; CHECK-NEXT: retq 315 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 316 ret <16 x float> %res 317 } 318 319 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 320 321 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 322 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 323 ; CHECK: ## %bb.0: 324 ; CHECK-NEXT: kmovw %edi, %k1 325 ; CHECK-NEXT: vfmsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2 326 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 327 ; CHECK-NEXT: retq 328 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 329 ret <8 x double> %res 330 } 331 332 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 333 334 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 335 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 336 ; CHECK: ## %bb.0: 337 ; CHECK-NEXT: kmovw %edi, %k1 338 ; CHECK-NEXT: vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2 339 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 340 ; CHECK-NEXT: retq 341 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 342 ret <16 x float> %res 343 } 344 345 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 346 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne: 347 ; CHECK: ## %bb.0: 348 ; CHECK-NEXT: kmovw %edi, %k1 349 ; CHECK-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 350 ; CHECK-NEXT: retq 351 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 352 ret <8 x double> %res 353 } 354 355 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 356 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn: 357 ; CHECK: ## %bb.0: 358 ; CHECK-NEXT: kmovw %edi, %k1 359 ; CHECK-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 360 ; CHECK-NEXT: retq 361 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 362 ret <8 x double> %res 363 } 364 365 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 366 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp: 367 ; CHECK: ## %bb.0: 368 ; CHECK-NEXT: kmovw %edi, %k1 369 ; CHECK-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 370 ; CHECK-NEXT: retq 371 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 372 ret <8 x double> %res 373 } 374 375 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 376 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz: 377 ; CHECK: ## %bb.0: 378 ; CHECK-NEXT: kmovw %edi, %k1 379 ; CHECK-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} 380 ; CHECK-NEXT: retq 381 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 382 ret <8 x double> %res 383 } 384 385 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 386 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current: 387 ; CHECK: ## %bb.0: 388 ; CHECK-NEXT: kmovw %edi, %k1 389 ; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2 390 ; CHECK-NEXT: retq 391 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 392 ret <8 x double> %res 393 } 394 395 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 396 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne: 397 ; CHECK: ## %bb.0: 398 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 399 ; CHECK-NEXT: retq 400 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 401 ret <8 x double> %res 402 } 403 404 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 405 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn: 406 ; CHECK: ## %bb.0: 407 ; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 408 ; CHECK-NEXT: retq 409 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 410 ret <8 x double> %res 411 } 412 413 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 414 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp: 415 ; CHECK: ## %bb.0: 416 ; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 417 ; CHECK-NEXT: retq 418 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 419 ret <8 x double> %res 420 } 421 422 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 423 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz: 424 ; CHECK: ## %bb.0: 425 ; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 426 ; CHECK-NEXT: retq 427 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 428 ret <8 x double> %res 429 } 430 431 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 432 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current: 433 ; CHECK: ## %bb.0: 434 ; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 435 ; CHECK-NEXT: retq 436 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 437 ret <8 x double> %res 438 } 439 440 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 441 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 442 ; CHECK: ## %bb.0: 443 ; CHECK-NEXT: kmovw %edi, %k1 444 ; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2 445 ; CHECK-NEXT: retq 446 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 447 ret <8 x double> %res 448 } 449 450 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 451 452 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 453 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 454 ; CHECK: ## %bb.0: 455 ; CHECK-NEXT: kmovw %edi, %k1 456 ; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2 457 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 458 ; CHECK-NEXT: retq 459 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 460 ret <8 x double> %res 461 } 462 463 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 464 465 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 466 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 467 ; CHECK: ## %bb.0: 468 ; CHECK-NEXT: kmovw %edi, %k1 469 ; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 470 ; CHECK-NEXT: retq 471 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 472 ret <8 x double> %res 473 } 474 475 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 476 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 477 ; CHECK: ## %bb.0: 478 ; CHECK-NEXT: kmovw %edi, %k1 479 ; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2 480 ; CHECK-NEXT: retq 481 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 482 ret <16 x float> %res 483 } 484 485 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 486 487 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 488 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 489 ; CHECK: ## %bb.0: 490 ; CHECK-NEXT: kmovw %edi, %k1 491 ; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2 492 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 493 ; CHECK-NEXT: retq 494 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 495 ret <16 x float> %res 496 } 497 498 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 499 500 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 501 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 502 ; CHECK: ## %bb.0: 503 ; CHECK-NEXT: kmovw %edi, %k1 504 ; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 505 ; CHECK-NEXT: retq 506 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 507 ret <16 x float> %res 508 } 509 510 511 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 512 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne: 513 ; CHECK: ## %bb.0: 514 ; CHECK-NEXT: kmovw %edi, %k1 515 ; CHECK-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 516 ; CHECK-NEXT: retq 517 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 518 ret <8 x double> %res 519 } 520 521 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 522 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn: 523 ; CHECK: ## %bb.0: 524 ; CHECK-NEXT: kmovw %edi, %k1 525 ; CHECK-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 526 ; CHECK-NEXT: retq 527 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 528 ret <8 x double> %res 529 } 530 531 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 532 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp: 533 ; CHECK: ## %bb.0: 534 ; CHECK-NEXT: kmovw %edi, %k1 535 ; CHECK-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 536 ; CHECK-NEXT: retq 537 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 538 ret <8 x double> %res 539 } 540 541 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 542 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz: 543 ; CHECK: ## %bb.0: 544 ; CHECK-NEXT: kmovw %edi, %k1 545 ; CHECK-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} 546 ; CHECK-NEXT: retq 547 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 548 ret <8 x double> %res 549 } 550 551 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 552 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current: 553 ; CHECK: ## %bb.0: 554 ; CHECK-NEXT: kmovw %edi, %k1 555 ; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 556 ; CHECK-NEXT: retq 557 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 558 ret <8 x double> %res 559 } 560 561 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 562 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne: 563 ; CHECK: ## %bb.0: 564 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 565 ; CHECK-NEXT: retq 566 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 567 ret <8 x double> %res 568 } 569 570 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 571 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn: 572 ; CHECK: ## %bb.0: 573 ; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 574 ; CHECK-NEXT: retq 575 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 576 ret <8 x double> %res 577 } 578 579 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 580 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp: 581 ; CHECK: ## %bb.0: 582 ; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 583 ; CHECK-NEXT: retq 584 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 585 ret <8 x double> %res 586 } 587 588 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 589 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz: 590 ; CHECK: ## %bb.0: 591 ; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 592 ; CHECK-NEXT: retq 593 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 594 ret <8 x double> %res 595 } 596 597 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 598 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current: 599 ; CHECK: ## %bb.0: 600 ; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 601 ; CHECK-NEXT: retq 602 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 603 ret <8 x double> %res 604 } 605 606 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 607 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 608 ; CHECK: ## %bb.0: 609 ; CHECK-NEXT: kmovw %edi, %k1 610 ; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 611 ; CHECK-NEXT: retq 612 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 613 ret <8 x double> %res 614 } 615 616 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 617 618 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 619 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 620 ; CHECK: ## %bb.0: 621 ; CHECK-NEXT: kmovw %edi, %k1 622 ; CHECK-NEXT: vfnmsub231pd {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2 623 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 624 ; CHECK-NEXT: retq 625 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 626 ret <8 x double> %res 627 } 628 629 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 630 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 631 ; CHECK: ## %bb.0: 632 ; CHECK-NEXT: kmovw %edi, %k1 633 ; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 634 ; CHECK-NEXT: retq 635 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 636 ret <16 x float> %res 637 } 638 639 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 640 641 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 642 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 643 ; CHECK: ## %bb.0: 644 ; CHECK-NEXT: kmovw %edi, %k1 645 ; CHECK-NEXT: vfnmsub231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2 646 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 647 ; CHECK-NEXT: retq 648 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 649 ret <16 x float> %res 650 } 651 652 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 653 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 654 ; CHECK: ## %bb.0: 655 ; CHECK-NEXT: kmovw %edi, %k1 656 ; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 657 ; CHECK-NEXT: retq 658 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 659 ret <8 x double> %res 660 } 661 662 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 663 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 664 ; CHECK: ## %bb.0: 665 ; CHECK-NEXT: kmovw %edi, %k1 666 ; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 667 ; CHECK-NEXT: retq 668 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 669 ret <16 x float> %res 670 } 671