1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s 3 4 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 5 6 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 7 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512: 8 ; CHECK: ## BB#0: 9 ; CHECK-NEXT: kmovb %edi, %k1 10 ; CHECK-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} 11 ; CHECK-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 12 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 13 ; CHECK-NEXT: retq 14 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 15 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 16 %res2 = add <8 x i64> %res, %res1 17 ret <8 x i64> %res2 18 } 19 20 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 21 22 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 23 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512: 24 ; CHECK: ## BB#0: 25 ; CHECK-NEXT: kmovb %edi, %k1 26 ; CHECK-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} 27 ; CHECK-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 28 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 29 ; CHECK-NEXT: retq 30 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 31 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 32 %res2 = add <8 x i64> %res, %res1 33 ret <8 x i64> %res2 34 } 35 36 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32) 37 38 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 39 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512: 40 ; CHECK: ## BB#0: 41 ; CHECK-NEXT: kmovb %edi, %k1 42 ; CHECK-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} 43 ; CHECK-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 44 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 45 ; CHECK-NEXT: retq 46 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 47 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 48 %res2 = add <8 x i64> %res, %res1 49 ret <8 x i64> %res2 50 } 51 52 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 53 54 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 55 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512: 56 ; CHECK: ## BB#0: 57 ; CHECK-NEXT: kmovb %edi, %k1 58 ; CHECK-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} 59 ; CHECK-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 60 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 61 ; CHECK-NEXT: retq 62 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 63 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 64 %res2 = add <8 x i64> %res, %res1 65 ret <8 x i64> %res2 66 } 67 68 declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 69 70 define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 71 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: 72 ; CHECK: ## BB#0: 73 ; CHECK-NEXT: kmovb %edi, %k1 74 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} 75 ; CHECK-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 76 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 77 ; CHECK-NEXT: retq 78 %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 79 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 80 %res2 = fadd <8 x double> %res, %res1 81 ret <8 x double> %res2 82 } 83 84 declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 85 86 define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 87 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 88 ; CHECK: ## BB#0: 89 ; CHECK-NEXT: kmovb %edi, %k1 90 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} 91 ; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 92 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 93 ; CHECK-NEXT: retq 94 %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 95 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 96 %res2 = fadd <8 x float> %res, %res1 97 ret <8 x float> %res2 98 } 99 100 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 101 102 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 103 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: 104 ; CHECK: ## BB#0: 105 ; CHECK-NEXT: kmovb %edi, %k1 106 ; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} 107 ; CHECK-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 108 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 109 ; CHECK-NEXT: retq 110 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 111 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 112 %res2 = add <8 x i64> %res, %res1 113 ret <8 x i64> %res2 114 } 115 116 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 117 118 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 119 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: 120 ; CHECK: ## BB#0: 121 ; CHECK-NEXT: kmovb %edi, %k1 122 ; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} 123 ; CHECK-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 124 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 125 ; CHECK-NEXT: retq 126 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 127 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 128 %res2 = add <8 x i64> %res, %res1 129 ret <8 x i64> %res2 130 } 131 132 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32) 133 134 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 135 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: 136 ; CHECK: ## BB#0: 137 ; CHECK-NEXT: kmovb %edi, %k1 138 ; CHECK-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} 139 ; CHECK-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 140 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 141 ; CHECK-NEXT: retq 142 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 143 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 144 %res2 = add <8 x i64> %res, %res1 145 ret <8 x i64> %res2 146 } 147 148 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 149 150 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 151 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: 152 ; CHECK: ## BB#0: 153 ; CHECK-NEXT: kmovb %edi, %k1 154 ; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} 155 ; CHECK-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 156 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 157 ; CHECK-NEXT: retq 158 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 159 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 160 %res2 = add <8 x i64> %res, %res1 161 ret <8 x i64> %res2 162 } 163 164 declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 165 166 define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 167 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: 168 ; CHECK: ## BB#0: 169 ; CHECK-NEXT: kmovb %edi, %k1 170 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} 171 ; CHECK-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 172 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 173 ; CHECK-NEXT: retq 174 %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 175 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 176 %res2 = fadd <8 x double> %res, %res1 177 ret <8 x double> %res2 178 } 179 180 declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 181 182 define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 183 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 184 ; CHECK: ## BB#0: 185 ; CHECK-NEXT: kmovb %edi, %k1 186 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} 187 ; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 188 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 189 ; CHECK-NEXT: retq 190 %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 191 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 192 %res2 = fadd <8 x float> %res, %res1 193 ret <8 x float> %res2 194 } 195 196 declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 197 198 define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 199 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_512: 200 ; CHECK: ## BB#0: 201 ; CHECK-NEXT: kmovb %edi, %k1 202 ; CHECK-NEXT: vreducepd $8, %zmm0, %zmm1 {%k1} 203 ; CHECK-NEXT: vreducepd $4, {sae}, %zmm0, %zmm0 204 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 205 ; CHECK-NEXT: retq 206 %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4) 207 %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8) 208 %res2 = fadd <8 x double> %res, %res1 209 ret <8 x double> %res2 210 } 211 212 declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 213 214 define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 215 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_512: 216 ; CHECK: ## BB#0: 217 ; CHECK-NEXT: kmovw %edi, %k1 218 ; CHECK-NEXT: vreduceps $44, {sae}, %zmm0, %zmm1 {%k1} 219 ; CHECK-NEXT: vreduceps $11, %zmm0, %zmm0 220 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 221 ; CHECK-NEXT: retq 222 %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8) 223 %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4) 224 %res2 = fadd <16 x float> %res, %res1 225 ret <16 x float> %res2 226 } 227 228 declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) 229 230 define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 231 ; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_512: 232 ; CHECK: ## BB#0: 233 ; CHECK-NEXT: kmovb %edi, %k1 234 ; CHECK-NEXT: vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} 235 ; CHECK-NEXT: vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 236 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 237 ; CHECK-NEXT: retq 238 %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4) 239 %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8) 240 %res2 = fadd <8 x double> %res, %res1 241 ret <8 x double> %res2 242 } 243 244 declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) 245 246 define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 247 ; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_512: 248 ; CHECK: ## BB#0: 249 ; CHECK-NEXT: kmovw %edi, %k1 250 ; CHECK-NEXT: vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1} 251 ; CHECK-NEXT: vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0 252 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 253 ; CHECK-NEXT: retq 254 %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4) 255 %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8) 256 %res2 = fadd <16 x float> %res, %res1 257 ret <16 x float> %res2 258 } 259 260 declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 261 262 define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 263 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ss: 264 ; CHECK: ## BB#0: 265 ; CHECK-NEXT: andl $1, %edi 266 ; CHECK-NEXT: kmovw %edi, %k1 267 ; CHECK-NEXT: vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} 268 ; CHECK-NEXT: vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 269 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 270 ; CHECK-NEXT: retq 271 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4) 272 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 273 %res2 = fadd <4 x float> %res, %res1 274 ret <4 x float> %res2 275 } 276 277 declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 278 279 define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 280 ; CHECK-LABEL: test_int_x86_avx512_mask_range_ss: 281 ; CHECK: ## BB#0: 282 ; CHECK-NEXT: andl $1, %edi 283 ; CHECK-NEXT: kmovw %edi, %k1 284 ; CHECK-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} 285 ; CHECK-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm0 286 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 287 ; CHECK-NEXT: retq 288 %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) 289 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 290 %res2 = fadd <4 x float> %res, %res1 291 ret <4 x float> %res2 292 } 293 294 declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 295 296 define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 297 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sd: 298 ; CHECK: ## BB#0: 299 ; CHECK-NEXT: andl $1, %edi 300 ; CHECK-NEXT: kmovw %edi, %k1 301 ; CHECK-NEXT: vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} 302 ; CHECK-NEXT: vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 303 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 304 ; CHECK-NEXT: retq 305 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 306 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 307 %res2 = fadd <2 x double> %res, %res1 308 ret <2 x double> %res2 309 } 310 311 declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 312 313 define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 314 ; CHECK-LABEL: test_int_x86_avx512_mask_range_sd: 315 ; CHECK: ## BB#0: 316 ; CHECK-NEXT: andl $1, %edi 317 ; CHECK-NEXT: kmovw %edi, %k1 318 ; CHECK-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} 319 ; CHECK-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0 320 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 321 ; CHECK-NEXT: retq 322 %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 323 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 324 %res2 = fadd <2 x double> %res, %res1 325 ret <2 x double> %res2 326 } 327 328 329 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8) 330 331 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { 332 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: 333 ; CHECK: ## BB#0: 334 ; CHECK-NEXT: kmovb %edi, %k1 335 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} 336 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z} 337 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 338 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 339 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 340 ; CHECK-NEXT: retq 341 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 342 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 343 %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 344 %res3 = fadd <2 x double> %res, %res1 345 %res4 = fadd <2 x double> %res2, %res3 346 ret <2 x double> %res4 347 } 348 349 declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8) 350 351 define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) { 352 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8: 353 ; CHECK: ## BB#0: 354 ; CHECK-NEXT: kmovb %edi, %k1 355 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} 356 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z} 357 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 358 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 359 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 360 ; CHECK-NEXT: retq 361 %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3) 362 %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3) 363 %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1) 364 %res3 = fadd <8 x float> %res, %res1 365 %res4 = fadd <8 x float> %res2, %res3 366 ret <8 x float> %res4 367 } 368 369 declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16) 370 371 define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) { 372 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512: 373 ; CHECK: ## BB#0: 374 ; CHECK-NEXT: kmovw %edi, %k1 375 ; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 376 ; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 377 ; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 378 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 379 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 380 ; CHECK-NEXT: retq 381 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) 382 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) 383 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1) 384 %res3 = fadd <16 x float> %res, %res1 385 %res4 = fadd <16 x float> %res2, %res3 386 ret <16 x float> %res4 387 } 388 389 declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8) 390 391 define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) { 392 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512: 393 ; CHECK: ## BB#0: 394 ; CHECK-NEXT: kmovb %edi, %k1 395 ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 396 ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 397 ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 398 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 399 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 400 ; CHECK-NEXT: retq 401 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 402 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 403 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 404 %res3 = fadd <8 x double> %res, %res1 405 %res4 = fadd <8 x double> %res3, %res2 406 ret <8 x double> %res4 407 } 408 409 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16) 410 411 define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) { 412 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512: 413 ; CHECK: ## BB#0: 414 ; CHECK-NEXT: kmovw %edi, %k1 415 ; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 416 ; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 417 ; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 418 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 419 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 420 ; CHECK-NEXT: retq 421 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) 422 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) 423 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) 424 %res3 = add <16 x i32> %res, %res1 425 %res4 = add <16 x i32> %res3, %res2 426 ret <16 x i32> %res4 427 } 428 429 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8) 430 431 define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) { 432 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512: 433 ; CHECK: ## BB#0: 434 ; CHECK-NEXT: kmovb %edi, %k1 435 ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 436 ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 437 ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 438 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 439 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 440 ; CHECK-NEXT: retq 441 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 442 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 443 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 444 %res3 = add <8 x i64> %res, %res1 445 %res4 = add <8 x i64> %res2, %res3 446 ret <8 x i64> %res4 447 } 448 449 declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) 450 451 define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { 452 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: 453 ; CHECK: ## BB#0: 454 ; CHECK-NEXT: kmovb %edi, %k1 455 ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k0 {%k1} 456 ; CHECK-NEXT: kmovb %k0, %ecx 457 ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 458 ; CHECK-NEXT: kmovb %k0, %eax 459 ; CHECK-NEXT: addb %cl, %al 460 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 461 ; CHECK-NEXT: retq 462 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) 463 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) 464 %res2 = add i8 %res, %res1 465 ret i8 %res2 466 } 467 declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) 468 469 define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { 470 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: 471 ; CHECK: ## BB#0: 472 ; CHECK-NEXT: kmovw %edi, %k1 473 ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} 474 ; CHECK-NEXT: kmovw %k0, %ecx 475 ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 476 ; CHECK-NEXT: kmovw %k0, %eax 477 ; CHECK-NEXT: addl %ecx, %eax 478 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 479 ; CHECK-NEXT: retq 480 %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1) 481 %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) 482 %res2 = add i16 %res, %res1 483 ret i16 %res2 484 } 485 486 declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) 487 488 define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { 489 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd: 490 ; CHECK: ## BB#0: 491 ; CHECK-NEXT: andl $1, %edi 492 ; CHECK-NEXT: kmovw %edi, %k1 493 ; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} 494 ; CHECK-NEXT: kmovw %k0, %eax 495 ; CHECK-NEXT: testb %al, %al 496 ; CHECK-NEXT: je LBB28_2 497 ; CHECK-NEXT: ## BB#1: 498 ; CHECK-NEXT: movb $-1, %al 499 ; CHECK-NEXT: LBB28_2: 500 ; CHECK-NEXT: vfpclasssd $4, %xmm0, %k0 501 ; CHECK-NEXT: kmovw %k0, %ecx 502 ; CHECK-NEXT: testb %cl, %cl 503 ; CHECK-NEXT: je LBB28_4 504 ; CHECK-NEXT: ## BB#3: 505 ; CHECK-NEXT: movb $-1, %cl 506 ; CHECK-NEXT: LBB28_4: 507 ; CHECK-NEXT: addb %cl, %al 508 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 509 ; CHECK-NEXT: retq 510 %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1) 511 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1) 512 %res2 = add i8 %res, %res1 513 ret i8 %res2 514 } 515 516 declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) 517 518 define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) { 519 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss: 520 ; CHECK: ## BB#0: 521 ; CHECK-NEXT: andl $1, %edi 522 ; CHECK-NEXT: kmovw %edi, %k1 523 ; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 {%k1} 524 ; CHECK-NEXT: kmovw %k0, %eax 525 ; CHECK-NEXT: testb %al, %al 526 ; CHECK-NEXT: je LBB29_2 527 ; CHECK-NEXT: ## BB#1: 528 ; CHECK-NEXT: movb $-1, %al 529 ; CHECK-NEXT: LBB29_2: 530 ; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 531 ; CHECK-NEXT: kmovw %k0, %ecx 532 ; CHECK-NEXT: testb %cl, %cl 533 ; CHECK-NEXT: je LBB29_4 534 ; CHECK-NEXT: ## BB#3: 535 ; CHECK-NEXT: movb $-1, %cl 536 ; CHECK-NEXT: LBB29_4: 537 ; CHECK-NEXT: addb %cl, %al 538 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 539 ; CHECK-NEXT: retq 540 %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1) 541 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) 542 %res2 = add i8 %res, %res1 543 ret i8 %res2 544 } 545 546 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16) 547 548 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) { 549 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512: 550 ; CHECK: ## BB#0: 551 ; CHECK-NEXT: kmovw %edi, %k1 552 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm1 {%k1} 553 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z} 554 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm0 555 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 556 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 557 ; CHECK-NEXT: retq 558 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 %x3) 559 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3) 560 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1) 561 %res3 = fadd <16 x float> %res, %res1 562 %res4 = fadd <16 x float> %res3, %res2 563 ret <16 x float> %res4 564 } 565 566 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16) 567 568 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) { 569 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512: 570 ; CHECK: ## BB#0: 571 ; CHECK-NEXT: kmovw %edi, %k1 572 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm1 {%k1} 573 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z} 574 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm0 575 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 576 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 577 ; CHECK-NEXT: retq 578 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) 579 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3) 580 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) 581 %res3 = add <16 x i32> %res, %res1 582 %res4 = add <16 x i32> %res3, %res2 583 ret <16 x i32> %res4 584 } 585 586 declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>) 587 588 define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) { 589 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512: 590 ; CHECK: ## BB#0: 591 ; CHECK-NEXT: vpmovd2m %zmm0, %k0 592 ; CHECK-NEXT: kmovw %k0, %eax 593 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 594 ; CHECK-NEXT: retq 595 %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) 596 ret i16 %res 597 } 598 599 declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>) 600 601 define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) { 602 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: 603 ; CHECK: ## BB#0: 604 ; CHECK-NEXT: vpmovq2m %zmm0, %k0 605 ; CHECK-NEXT: kmovb %k0, %eax 606 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 607 ; CHECK-NEXT: retq 608 %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) 609 ret i8 %res 610 } 611 612 declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16) 613 614 define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) { 615 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_512: 616 ; CHECK: ## BB#0: 617 ; CHECK-NEXT: kmovw %edi, %k0 618 ; CHECK-NEXT: vpmovm2d %k0, %zmm0 619 ; CHECK-NEXT: retq 620 %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0) 621 ret <16 x i32> %res 622 } 623 624 declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8) 625 626 define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) { 627 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512: 628 ; CHECK: ## BB#0: 629 ; CHECK-NEXT: kmovb %edi, %k0 630 ; CHECK-NEXT: vpmovm2q %k0, %zmm0 631 ; CHECK-NEXT: retq 632 %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0) 633 ret <8 x i64> %res 634 } 635 636 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16) 637 638 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) { 639 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512: 640 ; CHECK: ## BB#0: 641 ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 642 ; CHECK-NEXT: kmovw %edi, %k1 643 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 644 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 645 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 646 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 647 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 648 ; CHECK-NEXT: retq 649 650 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1) 651 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask) 652 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask) 653 %res4 = fadd <16 x float> %res1, %res2 654 %res5 = fadd <16 x float> %res3, %res4 655 ret <16 x float> %res5 656 } 657 658 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8) 659 660 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) { 661 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512: 662 ; CHECK: ## BB#0: 663 ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 664 ; CHECK-NEXT: kmovb %edi, %k1 665 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] 666 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] 667 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] 668 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 669 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 670 ; CHECK-NEXT: retq 671 672 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1) 673 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask) 674 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask) 675 %res4 = fadd <8 x double> %res1, %res2 676 %res5 = fadd <8 x double> %res3, %res4 677 ret <8 x double> %res5 678 } 679 680 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16) 681 682 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) { 683 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512: 684 ; CHECK: ## BB#0: 685 ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 686 ; CHECK-NEXT: kmovw %edi, %k1 687 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 688 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 689 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 690 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 691 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 692 ; CHECK-NEXT: retq 693 694 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1) 695 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) 696 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 697 %res4 = add <16 x i32> %res1, %res2 698 %res5 = add <16 x i32> %res3, %res4 699 ret <16 x i32> %res5 700 } 701 702 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8) 703 704 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) { 705 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512: 706 ; CHECK: ## BB#0: 707 ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 708 ; CHECK-NEXT: kmovb %edi, %k1 709 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] 710 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] 711 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] 712 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 713 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 714 ; CHECK-NEXT: retq 715 716 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1) 717 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) 718 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask) 719 %res4 = add <8 x i64> %res1, %res2 720 %res5 = add <8 x i64> %res3, %res4 721 ret <8 x i64> %res5 722 } 723