1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s 2 3 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 4 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 5 6 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 7 ; CHECK-LABEL: test_x86_vfnmadd_ps_z 8 ; CHECK: vfnmadd213ps %zmm 9 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 10 ret <16 x float> %res 11 } 12 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 13 14 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 15 ; CHECK-LABEL: test_mask_vfnmadd_ps 16 ; CHECK: vfnmadd213ps %zmm 17 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 18 ret <16 x float> %res 19 } 20 21 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 22 ; CHECK-LABEL: test_x86_vfnmadd_pd_z 23 ; CHECK: vfnmadd213pd %zmm 24 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 25 ret <8 x double> %res 26 } 27 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 28 29 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 30 ; CHECK-LABEL: test_mask_vfnmadd_pd 31 ; CHECK: vfnmadd213pd %zmm 32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 33 ret <8 x double> %res 34 } 35 36 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 37 ; CHECK-LABEL: test_x86_vfnmsubps_z 38 ; CHECK: vfnmsub213ps %zmm 39 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 40 ret <16 x float> %res 41 } 42 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 43 44 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 45 ; CHECK-LABEL: test_mask_vfnmsub_ps 46 ; CHECK: vfnmsub213ps %zmm 47 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 48 ret <16 x float> %res 49 } 50 51 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 52 ; CHECK-LABEL: test_x86_vfnmsubpd_z 53 ; CHECK: vfnmsub213pd %zmm 54 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 55 ret <8 x double> %res 56 } 57 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 58 59 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 60 ; CHECK-LABEL: test_mask_vfnmsub_pd 61 ; CHECK: vfnmsub213pd %zmm 62 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 63 ret <8 x double> %res 64 } 65 66 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 67 ; CHECK-LABEL: test_x86_vfmaddsubps_z 68 ; CHECK: vfmaddsub213ps %zmm 69 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 70 ret <16 x float> %res 71 } 72 73 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 74 ; CHECK-LABEL: test_mask_fmaddsub_ps: 75 ; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2] 76 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) 77 ret <16 x float> %res 78 } 79 80 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 81 82 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 83 ; CHECK-LABEL: test_x86_vfmaddsubpd_z 84 ; CHECK: vfmaddsub213pd %zmm 85 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 86 ret <8 x double> %res 87 } 88 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 89 90 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 91 ; CHECK-LABEL: test_mask_vfmaddsub_pd 92 ; CHECK: vfmaddsub213pd %zmm 93 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 94 ret <8 x double> %res 95 } 96 97 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 98 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 99 ; CHECK: ## BB#0: 100 ; CHECK-NEXT: movzbl %dil, %eax 101 ; CHECK-NEXT: kmovw %eax, %k1 102 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 103 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} 104 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 105 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 106 ; CHECK-NEXT: retq 107 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 108 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 109 %res2 = fadd <8 x double> %res, %res1 110 ret <8 x double> %res2 111 } 112 113 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 114 115 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 116 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 117 ; CHECK: ## BB#0: 118 ; CHECK-NEXT: movzbl %dil, %eax 119 ; CHECK-NEXT: kmovw %eax, %k1 120 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 121 ; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1} 122 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 123 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 124 ; CHECK-NEXT: retq 125 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 126 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 127 %res2 = fadd <8 x double> %res, %res1 128 ret <8 x double> %res2 129 } 130 131 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 132 133 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 134 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 135 ; CHECK: ## BB#0: 136 ; CHECK-NEXT: movzbl %dil, %eax 137 ; CHECK-NEXT: kmovw %eax, %k1 138 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 139 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 140 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 141 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 142 ; CHECK-NEXT: retq 143 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 144 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 145 %res2 = fadd <8 x double> %res, %res1 146 ret <8 x double> %res2 147 } 148 149 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 150 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 151 ; CHECK: ## BB#0: 152 ; CHECK-NEXT: kmovw %edi, %k1 153 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 154 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} 155 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 156 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 157 ; CHECK-NEXT: retq 158 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 159 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 160 %res2 = fadd <16 x float> %res, %res1 161 ret <16 x float> %res2 162 } 163 164 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 165 166 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 167 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 168 ; CHECK: ## BB#0: 169 ; CHECK-NEXT: kmovw %edi, %k1 170 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 171 ; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1} 172 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 173 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 174 ; CHECK-NEXT: retq 175 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 176 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 177 %res2 = fadd <16 x float> %res, %res1 178 ret <16 x float> %res2 179 } 180 181 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 182 183 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 184 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 185 ; CHECK: ## BB#0: 186 ; CHECK-NEXT: kmovw %edi, %k1 187 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 188 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 189 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 190 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 191 ; CHECK-NEXT: retq 192 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 193 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 194 %res2 = fadd <16 x float> %res, %res1 195 ret <16 x float> %res2 196 } 197 198 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 199 200 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 201 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 202 ; CHECK: ## BB#0: 203 ; CHECK-NEXT: movzbl %dil, %eax 204 ; CHECK-NEXT: kmovw %eax, %k1 205 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 206 ; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1} 207 ; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 208 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 209 ; CHECK-NEXT: retq 210 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 211 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 212 %res2 = fadd <8 x double> %res, %res1 213 ret <8 x double> %res2 214 } 215 216 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 217 218 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 219 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 220 ; CHECK: ## BB#0: 221 ; CHECK-NEXT: kmovw %edi, %k1 222 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 223 ; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1} 224 ; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 225 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 226 ; CHECK-NEXT: retq 227 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 228 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 229 %res2 = fadd <16 x float> %res, %res1 230 ret <16 x float> %res2 231 } 232 233 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 234 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne 235 ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2] 236 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind 237 ret <16 x float> %res 238 } 239 240 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 241 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn 242 ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2] 243 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind 244 ret <16 x float> %res 245 } 246 247 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 248 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp 249 ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2] 250 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind 251 ret <16 x float> %res 252 } 253 254 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 255 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz 256 ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2] 257 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind 258 ret <16 x float> %res 259 } 260 261 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 262 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current 263 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2] 264 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 265 ret <16 x float> %res 266 } 267 268 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 269 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne 270 ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2] 271 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind 272 ret <16 x float> %res 273 } 274 275 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 276 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn 277 ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2] 278 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind 279 ret <16 x float> %res 280 } 281 282 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 283 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp 284 ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2] 285 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind 286 ret <16 x float> %res 287 } 288 289 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 290 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz 291 ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2] 292 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind 293 ret <16 x float> %res 294 } 295 296 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 297 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current 298 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] 299 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 300 ret <16 x float> %res 301 } 302 303 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 304 305 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 306 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 307 ; CHECK: ## BB#0: 308 ; CHECK-NEXT: movzbl %dil, %eax 309 ; CHECK-NEXT: kmovw %eax, %k1 310 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 311 ; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 312 ; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 313 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 314 ; CHECK-NEXT: retq 315 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 316 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 317 %res2 = fadd <8 x double> %res, %res1 318 ret <8 x double> %res2 319 } 320 321 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 322 323 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 324 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 325 ; CHECK: ## BB#0: 326 ; CHECK-NEXT: kmovw %edi, %k1 327 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 328 ; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 329 ; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 330 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 331 ; CHECK-NEXT: retq 332 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 333 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 334 %res2 = fadd <16 x float> %res, %res1 335 ret <16 x float> %res2 336 } 337 338 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 339 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne 340 ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2] 341 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 342 ret <8 x double> %res 343 } 344 345 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 346 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn 347 ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2] 348 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 349 ret <8 x double> %res 350 } 351 352 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 353 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp 354 ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2] 355 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 356 ret <8 x double> %res 357 } 358 359 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 360 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz 361 ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2] 362 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 363 ret <8 x double> %res 364 } 365 366 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 367 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current 368 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2] 369 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 370 ret <8 x double> %res 371 } 372 373 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 374 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne 375 ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2] 376 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 377 ret <8 x double> %res 378 } 379 380 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 381 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn 382 ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2] 383 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 384 ret <8 x double> %res 385 } 386 387 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 388 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp 389 ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2] 390 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 391 ret <8 x double> %res 392 } 393 394 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 395 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz 396 ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2] 397 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 398 ret <8 x double> %res 399 } 400 401 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 402 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current 403 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] 404 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 405 ret <8 x double> %res 406 } 407 408 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 409 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 410 ; CHECK: ## BB#0: 411 ; CHECK-NEXT: movzbl %dil, %eax 412 ; CHECK-NEXT: kmovw %eax, %k1 413 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 414 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 415 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 416 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 417 ; CHECK-NEXT: retq 418 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 419 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 420 %res2 = fadd <8 x double> %res, %res1 421 ret <8 x double> %res2 422 } 423 424 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 425 426 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 427 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 428 ; CHECK: ## BB#0: 429 ; CHECK-NEXT: movzbl %dil, %eax 430 ; CHECK-NEXT: kmovw %eax, %k1 431 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 432 ; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1} 433 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 434 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 435 ; CHECK-NEXT: retq 436 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 437 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 438 %res2 = fadd <8 x double> %res, %res1 439 ret <8 x double> %res2 440 } 441 442 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 443 444 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 445 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 446 ; CHECK: ## BB#0: 447 ; CHECK-NEXT: movzbl %dil, %eax 448 ; CHECK-NEXT: kmovw %eax, %k1 449 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 450 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 451 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 452 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 453 ; CHECK-NEXT: retq 454 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 455 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 456 %res2 = fadd <8 x double> %res, %res1 457 ret <8 x double> %res2 458 } 459 460 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 461 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 462 ; CHECK: ## BB#0: 463 ; CHECK-NEXT: kmovw %edi, %k1 464 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 465 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 466 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 467 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 468 ; CHECK-NEXT: retq 469 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 470 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 471 %res2 = fadd <16 x float> %res, %res1 472 ret <16 x float> %res2 473 } 474 475 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 476 477 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 478 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 479 ; CHECK: ## BB#0: 480 ; CHECK-NEXT: kmovw %edi, %k1 481 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 482 ; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1} 483 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 484 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 485 ; CHECK-NEXT: retq 486 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 487 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 488 %res2 = fadd <16 x float> %res, %res1 489 ret <16 x float> %res2 490 } 491 492 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 493 494 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 495 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 496 ; CHECK: ## BB#0: 497 ; CHECK-NEXT: kmovw %edi, %k1 498 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 499 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 500 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 501 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 502 ; CHECK-NEXT: retq 503 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 504 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 505 %res2 = fadd <16 x float> %res, %res1 506 ret <16 x float> %res2 507 } 508 509 510 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 511 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne 512 ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2] 513 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 514 ret <8 x double> %res 515 } 516 517 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 518 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn 519 ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2] 520 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 521 ret <8 x double> %res 522 } 523 524 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 525 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp 526 ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2] 527 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 528 ret <8 x double> %res 529 } 530 531 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 532 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz 533 ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2] 534 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 535 ret <8 x double> %res 536 } 537 538 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 539 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current 540 ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2] 541 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 542 ret <8 x double> %res 543 } 544 545 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 546 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne 547 ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2] 548 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 549 ret <8 x double> %res 550 } 551 552 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 553 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn 554 ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2] 555 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 556 ret <8 x double> %res 557 } 558 559 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 560 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp 561 ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2] 562 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 563 ret <8 x double> %res 564 } 565 566 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 567 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz 568 ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2] 569 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 570 ret <8 x double> %res 571 } 572 573 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 574 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current 575 ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2] 576 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 577 ret <8 x double> %res 578 } 579 580 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 581 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 582 ; CHECK: ## BB#0: 583 ; CHECK-NEXT: movzbl %dil, %eax 584 ; CHECK-NEXT: kmovw %eax, %k1 585 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 586 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1} 587 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 588 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 589 ; CHECK-NEXT: retq 590 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 591 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 592 %res2 = fadd <8 x double> %res, %res1 593 ret <8 x double> %res2 594 } 595 596 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 597 598 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 599 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 600 ; CHECK: ## BB#0: 601 ; CHECK-NEXT: movzbl %dil, %eax 602 ; CHECK-NEXT: kmovw %eax, %k1 603 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 604 ; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 605 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 606 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 607 ; CHECK-NEXT: retq 608 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 609 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 610 %res2 = fadd <8 x double> %res, %res1 611 ret <8 x double> %res2 612 } 613 614 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 615 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 616 ; CHECK: ## BB#0: 617 ; CHECK-NEXT: kmovw %edi, %k1 618 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 619 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1} 620 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 621 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 622 ; CHECK-NEXT: retq 623 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 624 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 625 %res2 = fadd <16 x float> %res, %res1 626 ret <16 x float> %res2 627 } 628 629 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 630 631 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 632 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 633 ; CHECK: ## BB#0: 634 ; CHECK-NEXT: kmovw %edi, %k1 635 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 636 ; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 637 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 638 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 639 ; CHECK-NEXT: retq 640 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 641 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 642 %res2 = fadd <16 x float> %res, %res1 643 ret <16 x float> %res2 644 } 645 646 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 647 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 648 ; CHECK: ## BB#0: 649 ; CHECK-NEXT: movzbl %dil, %eax 650 ; CHECK-NEXT: kmovw %eax, %k1 651 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 652 ; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 653 ; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 654 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 655 ; CHECK-NEXT: retq 656 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 657 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 658 %res2 = fadd <8 x double> %res, %res1 659 ret <8 x double> %res2 660 } 661 662 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 663 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 664 ; CHECK: ## BB#0: 665 ; CHECK-NEXT: kmovw %edi, %k1 666 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 667 ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 668 ; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 669 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 670 ; CHECK-NEXT: retq 671 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 672 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 673 %res2 = fadd <16 x float> %res, %res1 674 ret <16 x float> %res2 675 } 676