1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl| FileCheck %s 2 3 ; 256-bit 4 5 ; CHECK-LABEL: vpaddq256_test 6 ; CHECK: vpaddq %ymm{{.*}} 7 ; CHECK: ret 8 define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 9 %x = add <4 x i64> %i, %j 10 ret <4 x i64> %x 11 } 12 13 ; CHECK-LABEL: vpaddq256_fold_test 14 ; CHECK: vpaddq (%rdi), %ymm{{.*}} 15 ; CHECK: ret 16 define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind { 17 %tmp = load <4 x i64>, <4 x i64>* %j, align 4 18 %x = add <4 x i64> %i, %tmp 19 ret <4 x i64> %x 20 } 21 22 ; CHECK-LABEL: vpaddq256_broadcast_test 23 ; CHECK: vpaddq LCP{{.*}}(%rip){1to4}, %ymm{{.*}} 24 ; CHECK: ret 25 define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind { 26 %x = add <4 x i64> %i, <i64 1, i64 1, i64 1, i64 1> 27 ret <4 x i64> %x 28 } 29 30 ; CHECK-LABEL: vpaddq256_broadcast2_test 31 ; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}} 32 ; CHECK: ret 33 define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind { 34 %j = load i64, i64* %j.ptr 35 %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0 36 %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer 37 %x = add <4 x i64> %i, %j.v 38 ret <4 x i64> %x 39 } 40 41 ; CHECK-LABEL: vpaddd256_test 42 ; CHECK: vpaddd %ymm{{.*}} 43 ; CHECK: ret 44 define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 45 %x = add <8 x i32> %i, %j 46 ret <8 x i32> %x 47 } 48 49 ; CHECK-LABEL: vpaddd256_fold_test 50 ; CHECK: vpaddd (%rdi), %ymm{{.*}} 51 ; CHECK: ret 52 define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind { 53 %tmp = load <8 x i32>, <8 x i32>* %j, align 4 54 %x = add <8 x i32> %i, %tmp 55 ret <8 x i32> %x 56 } 57 58 ; CHECK-LABEL: vpaddd256_broadcast_test 59 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*}} 60 ; CHECK: ret 61 define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind { 62 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 63 ret <8 x i32> %x 64 } 65 66 ; CHECK-LABEL: vpaddd256_mask_test 67 ; CHECK: vpaddd %ymm{{.*%k[1-7].*}} 68 ; CHECK: ret 69 define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 70 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 71 %x = add <8 x i32> %i, %j 72 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 73 ret <8 x i32> %r 74 } 75 76 ; CHECK-LABEL: vpaddd256_maskz_test 77 ; CHECK: vpaddd %ymm{{.*{%k[1-7]} {z}.*}} 78 ; CHECK: ret 79 define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 80 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 81 %x = add <8 x i32> %i, %j 82 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 83 ret <8 x i32> %r 84 } 85 86 ; CHECK-LABEL: vpaddd256_mask_fold_test 87 ; CHECK: vpaddd (%rdi), %ymm{{.*%k[1-7]}} 88 ; CHECK: ret 89 define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone { 90 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 91 %j = load <8 x i32>, <8 x i32>* %j.ptr 92 %x = add <8 x i32> %i, %j 93 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 94 ret <8 x i32> %r 95 } 96 97 ; CHECK-LABEL: vpaddd256_mask_broadcast_test 98 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]}}} 99 ; CHECK: ret 100 define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 101 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 102 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 103 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 104 ret <8 x i32> %r 105 } 106 107 ; CHECK-LABEL: vpaddd256_maskz_fold_test 108 ; CHECK: vpaddd (%rdi), %ymm{{.*{%k[1-7]} {z}}} 109 ; CHECK: ret 110 define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone { 111 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 112 %j = load <8 x i32>, <8 x i32>* %j.ptr 113 %x = add <8 x i32> %i, %j 114 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 115 ret <8 x i32> %r 116 } 117 118 ; CHECK-LABEL: vpaddd256_maskz_broadcast_test 119 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]} {z}}} 120 ; CHECK: ret 121 define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 122 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 123 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 124 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 125 ret <8 x i32> %r 126 } 127 128 ; CHECK-LABEL: vpsubq256_test 129 ; CHECK: vpsubq %ymm{{.*}} 130 ; CHECK: ret 131 define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 132 %x = sub <4 x i64> %i, %j 133 ret <4 x i64> %x 134 } 135 136 ; CHECK-LABEL: vpsubd256_test 137 ; CHECK: vpsubd %ymm{{.*}} 138 ; CHECK: ret 139 define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 140 %x = sub <8 x i32> %i, %j 141 ret <8 x i32> %x 142 } 143 144 ; CHECK-LABEL: vpmulld256_test 145 ; CHECK: vpmulld %ymm{{.*}} 146 ; CHECK: ret 147 define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) { 148 %x = mul <8 x i32> %i, %j 149 ret <8 x i32> %x 150 } 151 152 ; CHECK-LABEL: test_vaddpd_256 153 ; CHECK: vaddpd{{.*}} 154 ; CHECK: ret 155 define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) { 156 entry: 157 %add.i = fadd <4 x double> %x, %y 158 ret <4 x double> %add.i 159 } 160 161 ; CHECK-LABEL: test_fold_vaddpd_256 162 ; CHECK: vaddpd LCP{{.*}}(%rip){{.*}} 163 ; CHECK: ret 164 define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) { 165 entry: 166 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00> 167 ret <4 x double> %add.i 168 } 169 170 ; CHECK-LABEL: test_broadcast_vaddpd_256 171 ; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0 172 ; CHECK: ret 173 define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind { 174 %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 175 ret <8 x float> %b 176 } 177 178 ; CHECK-LABEL: test_mask_vaddps_256 179 ; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 180 ; CHECK: ret 181 define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, 182 <8 x float> %j, <8 x i32> %mask1) 183 nounwind readnone { 184 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 185 %x = fadd <8 x float> %i, %j 186 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 187 ret <8 x float> %r 188 } 189 190 ; CHECK-LABEL: test_mask_vmulps_256 191 ; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 192 ; CHECK: ret 193 define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, 194 <8 x float> %j, <8 x i32> %mask1) 195 nounwind readnone { 196 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 197 %x = fmul <8 x float> %i, %j 198 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 199 ret <8 x float> %r 200 } 201 202 ; CHECK-LABEL: test_mask_vminps_256 203 ; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 204 ; CHECK: ret 205 define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, 206 <8 x float> %j, <8 x i32> %mask1) 207 nounwind readnone { 208 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 209 %cmp_res = fcmp olt <8 x float> %i, %j 210 %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 211 %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst 212 ret <8 x float> %r 213 } 214 215 ; CHECK-LABEL: test_mask_vmaxps_256 216 ; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 217 ; CHECK: ret 218 define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, 219 <8 x float> %j, <8 x i32> %mask1) 220 nounwind readnone { 221 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 222 %cmp_res = fcmp ogt <8 x float> %i, %j 223 %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 224 %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst 225 ret <8 x float> %r 226 } 227 228 ; CHECK-LABEL: test_mask_vsubps_256 229 ; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 230 ; CHECK: ret 231 define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, 232 <8 x float> %j, <8 x i32> %mask1) 233 nounwind readnone { 234 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 235 %x = fsub <8 x float> %i, %j 236 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 237 ret <8 x float> %r 238 } 239 240 ; CHECK-LABEL: test_mask_vdivps_256 241 ; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 242 ; CHECK: ret 243 define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, 244 <8 x float> %j, <8 x i32> %mask1) 245 nounwind readnone { 246 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 247 %x = fdiv <8 x float> %i, %j 248 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 249 ret <8 x float> %r 250 } 251 252 ; CHECK-LABEL: test_mask_vmulpd_256 253 ; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 254 ; CHECK: ret 255 define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, 256 <4 x double> %j, <4 x i64> %mask1) 257 nounwind readnone { 258 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 259 %x = fmul <4 x double> %i, %j 260 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 261 ret <4 x double> %r 262 } 263 264 ; CHECK-LABEL: test_mask_vminpd_256 265 ; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 266 ; CHECK: ret 267 define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, 268 <4 x double> %j, <4 x i64> %mask1) 269 nounwind readnone { 270 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 271 %cmp_res = fcmp olt <4 x double> %i, %j 272 %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 273 %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst 274 ret <4 x double> %r 275 } 276 277 ; CHECK-LABEL: test_mask_vmaxpd_256 278 ; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 279 ; CHECK: ret 280 define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, 281 <4 x double> %j, <4 x i64> %mask1) 282 nounwind readnone { 283 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 284 %cmp_res = fcmp ogt <4 x double> %i, %j 285 %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 286 %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst 287 ret <4 x double> %r 288 } 289 290 ; CHECK-LABEL: test_mask_vsubpd_256 291 ; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 292 ; CHECK: ret 293 define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, 294 <4 x double> %j, <4 x i64> %mask1) 295 nounwind readnone { 296 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 297 %x = fsub <4 x double> %i, %j 298 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 299 ret <4 x double> %r 300 } 301 302 ; CHECK-LABEL: test_mask_vdivpd_256 303 ; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 304 ; CHECK: ret 305 define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, 306 <4 x double> %j, <4 x i64> %mask1) 307 nounwind readnone { 308 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 309 %x = fdiv <4 x double> %i, %j 310 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 311 ret <4 x double> %r 312 } 313 314 ; CHECK-LABEL: test_mask_vaddpd_256 315 ; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 316 ; CHECK: ret 317 define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, 318 <4 x double> %j, <4 x i64> %mask1) 319 nounwind readnone { 320 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 321 %x = fadd <4 x double> %i, %j 322 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 323 ret <4 x double> %r 324 } 325 326 ; CHECK-LABEL: test_maskz_vaddpd_256 327 ; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}} 328 ; CHECK: ret 329 define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, 330 <4 x i64> %mask1) nounwind readnone { 331 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 332 %x = fadd <4 x double> %i, %j 333 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 334 ret <4 x double> %r 335 } 336 337 ; CHECK-LABEL: test_mask_fold_vaddpd_256 338 ; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}} 339 ; CHECK: ret 340 define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, 341 <4 x double>* %j, <4 x i64> %mask1) 342 nounwind { 343 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 344 %tmp = load <4 x double>, <4 x double>* %j 345 %x = fadd <4 x double> %i, %tmp 346 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 347 ret <4 x double> %r 348 } 349 350 ; CHECK-LABEL: test_maskz_fold_vaddpd_256 351 ; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}} 352 ; CHECK: ret 353 define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j, 354 <4 x i64> %mask1) nounwind { 355 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 356 %tmp = load <4 x double>, <4 x double>* %j 357 %x = fadd <4 x double> %i, %tmp 358 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 359 ret <4 x double> %r 360 } 361 362 ; CHECK-LABEL: test_broadcast2_vaddpd_256 363 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}} 364 ; CHECK: ret 365 define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind { 366 %tmp = load double, double* %j 367 %b = insertelement <4 x double> undef, double %tmp, i32 0 368 %c = shufflevector <4 x double> %b, <4 x double> undef, 369 <4 x i32> zeroinitializer 370 %x = fadd <4 x double> %c, %i 371 ret <4 x double> %x 372 } 373 374 ; CHECK-LABEL: test_mask_broadcast_vaddpd_256 375 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}} 376 ; CHECK: ret 377 define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, 378 double* %j, <4 x i64> %mask1) nounwind { 379 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 380 %tmp = load double, double* %j 381 %b = insertelement <4 x double> undef, double %tmp, i32 0 382 %c = shufflevector <4 x double> %b, <4 x double> undef, 383 <4 x i32> zeroinitializer 384 %x = fadd <4 x double> %c, %i 385 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i 386 ret <4 x double> %r 387 } 388 389 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_256 390 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}} 391 ; CHECK: ret 392 define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j, 393 <4 x i64> %mask1) nounwind { 394 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 395 %tmp = load double, double* %j 396 %b = insertelement <4 x double> undef, double %tmp, i32 0 397 %c = shufflevector <4 x double> %b, <4 x double> undef, 398 <4 x i32> zeroinitializer 399 %x = fadd <4 x double> %c, %i 400 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 401 ret <4 x double> %r 402 } 403 404 ; 128-bit 405 406 ; CHECK-LABEL: vpaddq128_test 407 ; CHECK: vpaddq %xmm{{.*}} 408 ; CHECK: ret 409 define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 410 %x = add <2 x i64> %i, %j 411 ret <2 x i64> %x 412 } 413 414 ; CHECK-LABEL: vpaddq128_fold_test 415 ; CHECK: vpaddq (%rdi), %xmm{{.*}} 416 ; CHECK: ret 417 define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind { 418 %tmp = load <2 x i64>, <2 x i64>* %j, align 4 419 %x = add <2 x i64> %i, %tmp 420 ret <2 x i64> %x 421 } 422 423 ; CHECK-LABEL: vpaddq128_broadcast2_test 424 ; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}} 425 ; CHECK: ret 426 define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind { 427 %tmp = load i64, i64* %j 428 %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0 429 %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1 430 %x = add <2 x i64> %i, %j.1 431 ret <2 x i64> %x 432 } 433 434 ; CHECK-LABEL: vpaddd128_test 435 ; CHECK: vpaddd %xmm{{.*}} 436 ; CHECK: ret 437 define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 438 %x = add <4 x i32> %i, %j 439 ret <4 x i32> %x 440 } 441 442 ; CHECK-LABEL: vpaddd128_fold_test 443 ; CHECK: vpaddd (%rdi), %xmm{{.*}} 444 ; CHECK: ret 445 define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind { 446 %tmp = load <4 x i32>, <4 x i32>* %j, align 4 447 %x = add <4 x i32> %i, %tmp 448 ret <4 x i32> %x 449 } 450 451 ; CHECK-LABEL: vpaddd128_broadcast_test 452 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*}} 453 ; CHECK: ret 454 define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind { 455 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1> 456 ret <4 x i32> %x 457 } 458 459 ; CHECK-LABEL: vpaddd128_mask_test 460 ; CHECK: vpaddd %xmm{{.*%k[1-7].*}} 461 ; CHECK: ret 462 define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 463 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 464 %x = add <4 x i32> %i, %j 465 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 466 ret <4 x i32> %r 467 } 468 469 ; CHECK-LABEL: vpaddd128_maskz_test 470 ; CHECK: vpaddd %xmm{{.*{%k[1-7]} {z}.*}} 471 ; CHECK: ret 472 define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 473 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 474 %x = add <4 x i32> %i, %j 475 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 476 ret <4 x i32> %r 477 } 478 479 ; CHECK-LABEL: vpaddd128_mask_fold_test 480 ; CHECK: vpaddd (%rdi), %xmm{{.*%k[1-7]}} 481 ; CHECK: ret 482 define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone { 483 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 484 %j = load <4 x i32>, <4 x i32>* %j.ptr 485 %x = add <4 x i32> %i, %j 486 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 487 ret <4 x i32> %r 488 } 489 490 ; CHECK-LABEL: vpaddd128_mask_broadcast_test 491 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]}}} 492 ; CHECK: ret 493 define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 494 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 495 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1> 496 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 497 ret <4 x i32> %r 498 } 499 500 ; CHECK-LABEL: vpaddd128_maskz_fold_test 501 ; CHECK: vpaddd (%rdi), %xmm{{.*{%k[1-7]} {z}}} 502 ; CHECK: ret 503 define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone { 504 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 505 %j = load <4 x i32>, <4 x i32>* %j.ptr 506 %x = add <4 x i32> %i, %j 507 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 508 ret <4 x i32> %r 509 } 510 511 ; CHECK-LABEL: vpaddd128_maskz_broadcast_test 512 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]} {z}}} 513 ; CHECK: ret 514 define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 515 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 516 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1> 517 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 518 ret <4 x i32> %r 519 } 520 521 ; CHECK-LABEL: vpsubq128_test 522 ; CHECK: vpsubq %xmm{{.*}} 523 ; CHECK: ret 524 define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 525 %x = sub <2 x i64> %i, %j 526 ret <2 x i64> %x 527 } 528 529 ; CHECK-LABEL: vpsubd128_test 530 ; CHECK: vpsubd %xmm{{.*}} 531 ; CHECK: ret 532 define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 533 %x = sub <4 x i32> %i, %j 534 ret <4 x i32> %x 535 } 536 537 ; CHECK-LABEL: vpmulld128_test 538 ; CHECK: vpmulld %xmm{{.*}} 539 ; CHECK: ret 540 define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) { 541 %x = mul <4 x i32> %i, %j 542 ret <4 x i32> %x 543 } 544 545 ; CHECK-LABEL: test_vaddpd_128 546 ; CHECK: vaddpd{{.*}} 547 ; CHECK: ret 548 define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) { 549 entry: 550 %add.i = fadd <2 x double> %x, %y 551 ret <2 x double> %add.i 552 } 553 554 ; CHECK-LABEL: test_fold_vaddpd_128 555 ; CHECK: vaddpd LCP{{.*}}(%rip){{.*}} 556 ; CHECK: ret 557 define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) { 558 entry: 559 %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00> 560 ret <2 x double> %add.i 561 } 562 563 ; CHECK-LABEL: test_broadcast_vaddpd_128 564 ; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0 565 ; CHECK: ret 566 define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind { 567 %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 568 ret <4 x float> %b 569 } 570 571 ; CHECK-LABEL: test_mask_vaddps_128 572 ; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 573 ; CHECK: ret 574 define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i, 575 <4 x float> %j, <4 x i32> %mask1) 576 nounwind readnone { 577 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 578 %x = fadd <4 x float> %i, %j 579 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 580 ret <4 x float> %r 581 } 582 583 ; CHECK-LABEL: test_mask_vmulps_128 584 ; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 585 ; CHECK: ret 586 define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i, 587 <4 x float> %j, <4 x i32> %mask1) 588 nounwind readnone { 589 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 590 %x = fmul <4 x float> %i, %j 591 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 592 ret <4 x float> %r 593 } 594 595 ; CHECK-LABEL: test_mask_vminps_128 596 ; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 597 ; CHECK: ret 598 define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i, 599 <4 x float> %j, <4 x i32> %mask1) 600 nounwind readnone { 601 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 602 %cmp_res = fcmp olt <4 x float> %i, %j 603 %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 604 %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst 605 ret <4 x float> %r 606 } 607 608 ; CHECK-LABEL: test_mask_vmaxps_128 609 ; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 610 ; CHECK: ret 611 define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i, 612 <4 x float> %j, <4 x i32> %mask1) 613 nounwind readnone { 614 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 615 %cmp_res = fcmp ogt <4 x float> %i, %j 616 %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 617 %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst 618 ret <4 x float> %r 619 } 620 621 ; CHECK-LABEL: test_mask_vsubps_128 622 ; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 623 ; CHECK: ret 624 define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i, 625 <4 x float> %j, <4 x i32> %mask1) 626 nounwind readnone { 627 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 628 %x = fsub <4 x float> %i, %j 629 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 630 ret <4 x float> %r 631 } 632 633 634 ; CHECK-LABEL: test_mask_vdivps_128 635 ; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 636 ; CHECK: ret 637 define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i, 638 <4 x float> %j, <4 x i32> %mask1) 639 nounwind readnone { 640 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 641 %x = fdiv <4 x float> %i, %j 642 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 643 ret <4 x float> %r 644 } 645 646 ; CHECK-LABEL: test_mask_vmulpd_128 647 ; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 648 ; CHECK: ret 649 define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i, 650 <2 x double> %j, <2 x i64> %mask1) 651 nounwind readnone { 652 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 653 %x = fmul <2 x double> %i, %j 654 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 655 ret <2 x double> %r 656 } 657 658 ; CHECK-LABEL: test_mask_vminpd_128 659 ; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 660 ; CHECK: ret 661 define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i, 662 <2 x double> %j, <2 x i64> %mask1) 663 nounwind readnone { 664 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 665 %cmp_res = fcmp olt <2 x double> %i, %j 666 %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 667 %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst 668 ret <2 x double> %r 669 } 670 671 ; CHECK-LABEL: test_mask_vmaxpd_128 672 ; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 673 ; CHECK: ret 674 define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i, 675 <2 x double> %j, <2 x i64> %mask1) 676 nounwind readnone { 677 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 678 %cmp_res = fcmp ogt <2 x double> %i, %j 679 %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 680 %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst 681 ret <2 x double> %r 682 } 683 684 ; CHECK-LABEL: test_mask_vsubpd_128 685 ; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 686 ; CHECK: ret 687 define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i, 688 <2 x double> %j, <2 x i64> %mask1) 689 nounwind readnone { 690 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 691 %x = fsub <2 x double> %i, %j 692 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 693 ret <2 x double> %r 694 } 695 696 ; CHECK-LABEL: test_mask_vdivpd_128 697 ; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 698 ; CHECK: ret 699 define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i, 700 <2 x double> %j, <2 x i64> %mask1) 701 nounwind readnone { 702 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 703 %x = fdiv <2 x double> %i, %j 704 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 705 ret <2 x double> %r 706 } 707 708 ; CHECK-LABEL: test_mask_vaddpd_128 709 ; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 710 ; CHECK: ret 711 define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i, 712 <2 x double> %j, <2 x i64> %mask1) 713 nounwind readnone { 714 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 715 %x = fadd <2 x double> %i, %j 716 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 717 ret <2 x double> %r 718 } 719 720 ; CHECK-LABEL: test_maskz_vaddpd_128 721 ; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}} 722 ; CHECK: ret 723 define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j, 724 <2 x i64> %mask1) nounwind readnone { 725 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 726 %x = fadd <2 x double> %i, %j 727 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 728 ret <2 x double> %r 729 } 730 731 ; CHECK-LABEL: test_mask_fold_vaddpd_128 732 ; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}} 733 ; CHECK: ret 734 define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i, 735 <2 x double>* %j, <2 x i64> %mask1) 736 nounwind { 737 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 738 %tmp = load <2 x double>, <2 x double>* %j 739 %x = fadd <2 x double> %i, %tmp 740 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 741 ret <2 x double> %r 742 } 743 744 ; CHECK-LABEL: test_maskz_fold_vaddpd_128 745 ; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}} 746 ; CHECK: ret 747 define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j, 748 <2 x i64> %mask1) nounwind { 749 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 750 %tmp = load <2 x double>, <2 x double>* %j 751 %x = fadd <2 x double> %i, %tmp 752 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 753 ret <2 x double> %r 754 } 755 756 ; CHECK-LABEL: test_broadcast2_vaddpd_128 757 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}} 758 ; CHECK: ret 759 define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind { 760 %tmp = load double, double* %j 761 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 762 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 763 %x = fadd <2 x double> %j.1, %i 764 ret <2 x double> %x 765 } 766 767 ; CHECK-LABEL: test_mask_broadcast_vaddpd_128 768 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}} 769 ; CHECK: ret 770 define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, 771 double* %j, <2 x i64> %mask1) 772 nounwind { 773 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 774 %tmp = load double, double* %j 775 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 776 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 777 %x = fadd <2 x double> %j.1, %i 778 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i 779 ret <2 x double> %r 780 } 781 782 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_128 783 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}} 784 ; CHECK: ret 785 define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j, 786 <2 x i64> %mask1) nounwind { 787 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 788 %tmp = load double, double* %j 789 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 790 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 791 %x = fadd <2 x double> %j.1, %i 792 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 793 ret <2 x double> %r 794 } 795