1 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s 3 4 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 5 6 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 7 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512: 8 ; CHECK: ## BB#0: 9 ; CHECK-NEXT: kmovb %edi, %k1 10 ; CHECK-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} 11 ; CHECK-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 12 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 13 ; CHECK-NEXT: retq 14 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 15 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 16 %res2 = add <8 x i64> %res, %res1 17 ret <8 x i64> %res2 18 } 19 20 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 21 22 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 23 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512: 24 ; CHECK: ## BB#0: 25 ; CHECK-NEXT: kmovb %edi, %k1 26 ; CHECK-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} 27 ; CHECK-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 28 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 29 ; CHECK-NEXT: retq 30 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 31 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 32 %res2 = add <8 x i64> %res, %res1 33 ret <8 x i64> %res2 34 } 35 36 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32) 37 38 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 39 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512: 40 ; CHECK: ## BB#0: 41 ; CHECK-NEXT: kmovb %edi, %k1 42 ; CHECK-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} 43 ; CHECK-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 44 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 45 ; CHECK-NEXT: retq 46 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 47 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 48 %res2 = add <8 x i64> %res, %res1 49 ret <8 x i64> %res2 50 } 51 52 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 53 54 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 55 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512: 56 ; CHECK: ## BB#0: 57 ; CHECK-NEXT: kmovb %edi, %k1 58 ; CHECK-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} 59 ; CHECK-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 60 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 61 ; CHECK-NEXT: retq 62 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 63 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 64 %res2 = add <8 x i64> %res, %res1 65 ret <8 x i64> %res2 66 } 67 68 declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 69 70 define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 71 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: 72 ; CHECK: ## BB#0: 73 ; CHECK-NEXT: kmovb %edi, %k1 74 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} 75 ; CHECK-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 76 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 77 ; CHECK-NEXT: retq 78 %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 79 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 80 %res2 = fadd <8 x double> %res, %res1 81 ret <8 x double> %res2 82 } 83 84 declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 85 86 define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 87 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 88 ; CHECK: ## BB#0: 89 ; CHECK-NEXT: kmovb %edi, %k1 90 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} 91 ; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 92 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 93 ; CHECK-NEXT: retq 94 %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 95 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 96 %res2 = fadd <8 x float> %res, %res1 97 ret <8 x float> %res2 98 } 99 100 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 101 102 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 103 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: 104 ; CHECK: ## BB#0: 105 ; CHECK-NEXT: kmovb %edi, %k1 106 ; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} 107 ; CHECK-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 108 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 109 ; CHECK-NEXT: retq 110 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 111 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 112 %res2 = add <8 x i64> %res, %res1 113 ret <8 x i64> %res2 114 } 115 116 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 117 118 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 119 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: 120 ; CHECK: ## BB#0: 121 ; CHECK-NEXT: kmovb %edi, %k1 122 ; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} 123 ; CHECK-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 124 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 125 ; CHECK-NEXT: retq 126 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 127 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 128 %res2 = add <8 x i64> %res, %res1 129 ret <8 x i64> %res2 130 } 131 132 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32) 133 134 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 135 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: 136 ; CHECK: ## BB#0: 137 ; CHECK-NEXT: kmovb %edi, %k1 138 ; CHECK-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} 139 ; CHECK-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 140 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 141 ; CHECK-NEXT: retq 142 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 143 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 144 %res2 = add <8 x i64> %res, %res1 145 ret <8 x i64> %res2 146 } 147 148 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 149 150 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 151 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: 152 ; CHECK: ## BB#0: 153 ; CHECK-NEXT: kmovb %edi, %k1 154 ; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} 155 ; CHECK-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 156 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 157 ; CHECK-NEXT: retq 158 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 159 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 160 %res2 = add <8 x i64> %res, %res1 161 ret <8 x i64> %res2 162 } 163 164 declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 165 166 define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 167 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: 168 ; CHECK: ## BB#0: 169 ; CHECK-NEXT: kmovb %edi, %k1 170 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} 171 ; CHECK-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 172 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 173 ; CHECK-NEXT: retq 174 %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 175 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 176 %res2 = fadd <8 x double> %res, %res1 177 ret <8 x double> %res2 178 } 179 180 declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 181 182 define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 183 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 184 ; CHECK: ## BB#0: 185 ; CHECK-NEXT: kmovb %edi, %k1 186 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} 187 ; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 188 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 189 ; CHECK-NEXT: retq 190 %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 191 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 192 %res2 = fadd <8 x float> %res, %res1 193 ret <8 x float> %res2 194 } 195 196 declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 197 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512 198 ; CHECK-NOT: call 199 ; CHECK: kmov 200 ; CHECK: vreducepd {{.*}}{%k1} 201 ; CHECK: vreducepd 202 ; CHECK: {sae} 203 define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 204 %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4) 205 %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8) 206 %res2 = fadd <8 x double> %res, %res1 207 ret <8 x double> %res2 208 } 209 210 declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 211 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512 212 ; CHECK-NOT: call 213 ; CHECK: kmov 214 ; CHECK: vreduceps 215 ; CHECK: {sae} 216 ; CKECK: {%k1} 217 ; CHECK: vreduceps 218 define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 219 %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8) 220 %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4) 221 %res2 = fadd <16 x float> %res, %res1 222 ret <16 x float> %res2 223 } 224 225 declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) 226 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512 227 ; CHECK-NOT: call 228 ; CHECK: kmov 229 ; CHECK: vrangepd 230 ; CKECK: {%k1} 231 ; CHECK: vrangepd 232 ; CHECK: {sae} 233 define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 234 %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4) 235 %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8) 236 %res2 = fadd <8 x double> %res, %res1 237 ret <8 x double> %res2 238 } 239 240 declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) 241 242 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512 243 ; CHECK-NOT: call 244 ; CHECK: kmov 245 ; CHECK: vrangeps 246 ; CKECK: {%k1} 247 ; CHECK: vrangeps 248 ; CHECK: {sae} 249 define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 250 %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4) 251 %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8) 252 %res2 = fadd <16 x float> %res, %res1 253 ret <16 x float> %res2 254 } 255 256 declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 257 258 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss 259 ; CHECK-NOT: call 260 ; CHECK: kmov 261 ; CHECK: vreducess 262 ; CKECK: {%k1} 263 ; CHECK: vreducess 264 ; CHECK: {sae} 265 define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 266 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4) 267 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 268 %res2 = fadd <4 x float> %res, %res1 269 ret <4 x float> %res2 270 } 271 272 declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 273 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss 274 ; CHECK-NOT: call 275 ; CHECK: kmov 276 ; CHECK: vrangess 277 ; CHECK: {sae} 278 ; CKECK: {%k1} 279 ; CHECK: vrangess 280 ; CHECK: {sae} 281 define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 282 %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) 283 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 284 %res2 = fadd <4 x float> %res, %res1 285 ret <4 x float> %res2 286 } 287 288 declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 289 290 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd 291 ; CHECK-NOT: call 292 ; CHECK: kmov 293 ; CHECK: vreducesd 294 ; CKECK: {%k1} 295 ; CHECK: vreducesd 296 ; CHECK: {sae} 297 define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 298 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 299 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 300 %res2 = fadd <2 x double> %res, %res1 301 ret <2 x double> %res2 302 } 303 304 declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 305 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd 306 ; CHECK-NOT: call 307 ; CHECK: kmov 308 ; CHECK: vrangesd 309 ; CKECK: {%k1} 310 ; CHECK: vrangesd 311 ; CHECK: {sae} 312 define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 313 %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 314 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 315 %res2 = fadd <2 x double> %res, %res1 316 ret <2 x double> %res2 317 } 318 319 320 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8) 321 322 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { 323 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: 324 ; CHECK: ## BB#0: 325 ; CHECK-NEXT: kmovb %edi, %k1 326 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} 327 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z} 328 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 329 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 330 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 331 ; CHECK-NEXT: retq 332 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 333 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 334 %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 335 %res3 = fadd <2 x double> %res, %res1 336 %res4 = fadd <2 x double> %res2, %res3 337 ret <2 x double> %res4 338 } 339 340 declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8) 341 342 define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) { 343 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8: 344 ; CHECK: ## BB#0: 345 ; CHECK-NEXT: kmovb %edi, %k1 346 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} 347 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z} 348 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 349 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 350 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 351 ; CHECK-NEXT: retq 352 %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3) 353 %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3) 354 %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1) 355 %res3 = fadd <8 x float> %res, %res1 356 %res4 = fadd <8 x float> %res2, %res3 357 ret <8 x float> %res4 358 } 359 360 declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16) 361 362 define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) { 363 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512: 364 ; CHECK: ## BB#0: 365 ; CHECK-NEXT: kmovw %edi, %k1 366 ; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 367 ; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 368 ; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 369 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 370 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 371 ; CHECK-NEXT: retq 372 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) 373 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) 374 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1) 375 %res3 = fadd <16 x float> %res, %res1 376 %res4 = fadd <16 x float> %res2, %res3 377 ret <16 x float> %res4 378 } 379 380 declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8) 381 382 define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) { 383 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512: 384 ; CHECK: ## BB#0: 385 ; CHECK-NEXT: kmovb %edi, %k1 386 ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 387 ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 388 ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 389 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 390 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 391 ; CHECK-NEXT: retq 392 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 393 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 394 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 395 %res3 = fadd <8 x double> %res, %res1 396 %res4 = fadd <8 x double> %res3, %res2 397 ret <8 x double> %res4 398 } 399 400 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16) 401 402 define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) { 403 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512: 404 ; CHECK: ## BB#0: 405 ; CHECK-NEXT: kmovw %edi, %k1 406 ; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 407 ; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 408 ; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 409 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 410 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 411 ; CHECK-NEXT: retq 412 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) 413 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) 414 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) 415 %res3 = add <16 x i32> %res, %res1 416 %res4 = add <16 x i32> %res3, %res2 417 ret <16 x i32> %res4 418 } 419 420 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8) 421 422 define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) { 423 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512: 424 ; CHECK: ## BB#0: 425 ; CHECK-NEXT: kmovb %edi, %k1 426 ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 427 ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 428 ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 429 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 430 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 431 ; CHECK-NEXT: retq 432 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 433 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 434 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 435 %res3 = add <8 x i64> %res, %res1 436 %res4 = add <8 x i64> %res2, %res3 437 ret <8 x i64> %res4 438 } 439 440 declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) 441 442 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512 443 ; CHECK-NOT: call 444 ; CHECK: kmov 445 ; CHECK: vfpclasspd 446 ; CHECK: {%k1} 447 ; CHECK: vfpclasspd 448 ; CHECK: kmovb %k0 449 define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { 450 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) 451 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) 452 %res2 = add i8 %res, %res1 453 ret i8 %res2 454 } 455 declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) 456 457 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512 458 ; CHECK-NOT: call 459 ; CHECK: kmov 460 ; CHECK: vfpclassps 461 ; CHECK: vfpclassps 462 ; CHECK: {%k1} 463 ; CHECK: kmov 464 define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { 465 %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1) 466 %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) 467 %res2 = add i16 %res, %res1 468 ret i16 %res2 469 } 470 471 declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) 472 473 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd 474 ; CHECK-NOT: call 475 ; CHECK: kmov 476 ; CHECK: vfpclasssd 477 ; CHECK: %k0 {%k1} 478 ; CHECK: vfpclasssd 479 ; CHECK: %k0 480 define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { 481 %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1) 482 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1) 483 %res2 = add i8 %res, %res1 484 ret i8 %res2 485 } 486 487 declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) 488 489 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss 490 ; CHECK-NOT: call 491 ; CHECK: kmovw 492 ; CHECK: vfpclassss 493 ; CHECK: %k0 494 ; CHECK: {%k1} 495 ; CHECK: kmovw 496 ; CHECK: vfpclassss 497 ; CHECK: %k0 498 define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) { 499 %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1) 500 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) 501 %res2 = add i8 %res, %res1 502 ret i8 %res2 503 } 504 505 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16) 506 507 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) { 508 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512: 509 ; CHECK: ## BB#0: 510 ; CHECK-NEXT: kmovw %edi, %k1 511 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm1 {%k1} 512 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z} 513 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm0 514 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 515 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 516 ; CHECK-NEXT: retq 517 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 %x3) 518 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3) 519 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1) 520 %res3 = fadd <16 x float> %res, %res1 521 %res4 = fadd <16 x float> %res3, %res2 522 ret <16 x float> %res4 523 } 524 525 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16) 526 527 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) { 528 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512: 529 ; CHECK: ## BB#0: 530 ; CHECK-NEXT: kmovw %edi, %k1 531 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm1 {%k1} 532 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z} 533 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm0 534 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 535 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 536 ; CHECK-NEXT: retq 537 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) 538 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3) 539 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) 540 %res3 = add <16 x i32> %res, %res1 541 %res4 = add <16 x i32> %res3, %res2 542 ret <16 x i32> %res4 543 } 544