1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 2 3 ; CHECK-LABEL: test_256_1 4 ; CHECK: vmovdqu32 5 ; CHECK: ret 6 define <8 x i32> @test_256_1(i8 * %addr) { 7 %vaddr = bitcast i8* %addr to <8 x i32>* 8 %res = load <8 x i32>, <8 x i32>* %vaddr, align 1 9 ret <8 x i32>%res 10 } 11 12 ; CHECK-LABEL: test_256_2 13 ; CHECK: vmovdqa32 14 ; CHECK: ret 15 define <8 x i32> @test_256_2(i8 * %addr) { 16 %vaddr = bitcast i8* %addr to <8 x i32>* 17 %res = load <8 x i32>, <8 x i32>* %vaddr, align 32 18 ret <8 x i32>%res 19 } 20 21 ; CHECK-LABEL: test_256_3 22 ; CHECK: vmovdqa64 23 ; CHECK: ret 24 define void @test_256_3(i8 * %addr, <4 x i64> %data) { 25 %vaddr = bitcast i8* %addr to <4 x i64>* 26 store <4 x i64>%data, <4 x i64>* %vaddr, align 32 27 ret void 28 } 29 30 ; CHECK-LABEL: test_256_4 31 ; CHECK: vmovdqu32 32 ; CHECK: ret 33 define void @test_256_4(i8 * %addr, <8 x i32> %data) { 34 %vaddr = bitcast i8* %addr to <8 x i32>* 35 store <8 x i32>%data, <8 x i32>* %vaddr, align 1 36 ret void 37 } 38 39 ; CHECK-LABEL: test_256_5 40 ; CHECK: vmovdqa32 41 ; CHECK: ret 42 define void @test_256_5(i8 * %addr, <8 x i32> %data) { 43 %vaddr = bitcast i8* %addr to <8 x i32>* 44 store <8 x i32>%data, <8 x i32>* %vaddr, align 32 45 ret void 46 } 47 48 ; CHECK-LABEL: test_256_6 49 ; CHECK: vmovdqa64 50 ; CHECK: ret 51 define <4 x i64> @test_256_6(i8 * %addr) { 52 %vaddr = bitcast i8* %addr to <4 x i64>* 53 %res = load <4 x i64>, <4 x i64>* %vaddr, align 32 54 ret <4 x i64>%res 55 } 56 57 ; CHECK-LABEL: test_256_7 58 ; CHECK: vmovdqu64 59 ; CHECK: ret 60 define void @test_256_7(i8 * %addr, <4 x i64> %data) { 61 %vaddr = bitcast i8* %addr to <4 x i64>* 62 store <4 x i64>%data, <4 x i64>* %vaddr, align 1 63 ret void 64 } 65 66 ; CHECK-LABEL: test_256_8 67 ; CHECK: vmovdqu64 68 ; CHECK: ret 69 define <4 x i64> @test_256_8(i8 * %addr) { 70 %vaddr = bitcast i8* %addr to <4 x i64>* 71 %res = load <4 x i64>, <4 x i64>* %vaddr, align 1 72 ret <4 x i64>%res 73 } 74 75 ; CHECK-LABEL: test_256_9 76 ; CHECK: vmovapd {{.*}} ## encoding: [0x62 77 ; CHECK: ret 78 define void @test_256_9(i8 * %addr, <4 x double> %data) { 79 %vaddr = bitcast i8* %addr to <4 x double>* 80 store <4 x double>%data, <4 x double>* %vaddr, align 32 81 ret void 82 } 83 84 ; CHECK-LABEL: test_256_10 85 ; CHECK: vmovapd {{.*}} ## encoding: [0x62 86 ; CHECK: ret 87 define <4 x double> @test_256_10(i8 * %addr) { 88 %vaddr = bitcast i8* %addr to <4 x double>* 89 %res = load <4 x double>, <4 x double>* %vaddr, align 32 90 ret <4 x double>%res 91 } 92 93 ; CHECK-LABEL: test_256_11 94 ; CHECK: vmovaps {{.*}} ## encoding: [0x62 95 ; CHECK: ret 96 define void @test_256_11(i8 * %addr, <8 x float> %data) { 97 %vaddr = bitcast i8* %addr to <8 x float>* 98 store <8 x float>%data, <8 x float>* %vaddr, align 32 99 ret void 100 } 101 102 ; CHECK-LABEL: test_256_12 103 ; CHECK: vmovaps {{.*}} ## encoding: [0x62 104 ; CHECK: ret 105 define <8 x float> @test_256_12(i8 * %addr) { 106 %vaddr = bitcast i8* %addr to <8 x float>* 107 %res = load <8 x float>, <8 x float>* %vaddr, align 32 108 ret <8 x float>%res 109 } 110 111 ; CHECK-LABEL: test_256_13 112 ; CHECK: vmovupd {{.*}} ## encoding: [0x62 113 ; CHECK: ret 114 define void @test_256_13(i8 * %addr, <4 x double> %data) { 115 %vaddr = bitcast i8* %addr to <4 x double>* 116 store <4 x double>%data, <4 x double>* %vaddr, align 1 117 ret void 118 } 119 120 ; CHECK-LABEL: test_256_14 121 ; CHECK: vmovupd {{.*}} ## encoding: [0x62 122 ; CHECK: ret 123 define <4 x double> @test_256_14(i8 * %addr) { 124 %vaddr = bitcast i8* %addr to <4 x double>* 125 %res = load <4 x double>, <4 x double>* %vaddr, align 1 126 ret <4 x double>%res 127 } 128 129 ; CHECK-LABEL: test_256_15 130 ; CHECK: vmovups {{.*}} ## encoding: [0x62 131 ; CHECK: ret 132 define void @test_256_15(i8 * %addr, <8 x float> %data) { 133 %vaddr = bitcast i8* %addr to <8 x float>* 134 store <8 x float>%data, <8 x float>* %vaddr, align 1 135 ret void 136 } 137 138 ; CHECK-LABEL: test_256_16 139 ; CHECK: vmovups {{.*}} ## encoding: [0x62 140 ; CHECK: ret 141 define <8 x float> @test_256_16(i8 * %addr) { 142 %vaddr = bitcast i8* %addr to <8 x float>* 143 %res = load <8 x float>, <8 x float>* %vaddr, align 1 144 ret <8 x float>%res 145 } 146 147 ; CHECK-LABEL: test_256_17 148 ; CHECK: vmovdqa32{{.*{%k[1-7]} }} 149 ; CHECK: ret 150 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 151 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 152 %vaddr = bitcast i8* %addr to <8 x i32>* 153 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 154 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 155 ret <8 x i32>%res 156 } 157 158 ; CHECK-LABEL: test_256_18 159 ; CHECK: vmovdqu32{{.*{%k[1-7]} }} 160 ; CHECK: ret 161 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 162 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 163 %vaddr = bitcast i8* %addr to <8 x i32>* 164 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 165 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 166 ret <8 x i32>%res 167 } 168 169 ; CHECK-LABEL: test_256_19 170 ; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} 171 ; CHECK: ret 172 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) { 173 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 174 %vaddr = bitcast i8* %addr to <8 x i32>* 175 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 176 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 177 ret <8 x i32>%res 178 } 179 180 ; CHECK-LABEL: test_256_20 181 ; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} 182 ; CHECK: ret 183 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) { 184 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 185 %vaddr = bitcast i8* %addr to <8 x i32>* 186 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 187 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 188 ret <8 x i32>%res 189 } 190 191 ; CHECK-LABEL: test_256_21 192 ; CHECK: vmovdqa64{{.*{%k[1-7]} }} 193 ; CHECK: ret 194 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 195 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 196 %vaddr = bitcast i8* %addr to <4 x i64>* 197 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 198 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 199 ret <4 x i64>%res 200 } 201 202 ; CHECK-LABEL: test_256_22 203 ; CHECK: vmovdqu64{{.*{%k[1-7]} }} 204 ; CHECK: ret 205 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 206 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 207 %vaddr = bitcast i8* %addr to <4 x i64>* 208 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 209 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 210 ret <4 x i64>%res 211 } 212 213 ; CHECK-LABEL: test_256_23 214 ; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} 215 ; CHECK: ret 216 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) { 217 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 218 %vaddr = bitcast i8* %addr to <4 x i64>* 219 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 220 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 221 ret <4 x i64>%res 222 } 223 224 ; CHECK-LABEL: test_256_24 225 ; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} 226 ; CHECK: ret 227 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) { 228 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 229 %vaddr = bitcast i8* %addr to <4 x i64>* 230 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 231 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 232 ret <4 x i64>%res 233 } 234 235 ; CHECK-LABEL: test_256_25 236 ; CHECK: vmovaps{{.*{%k[1-7]} }} 237 ; CHECK: ret 238 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 239 %mask = fcmp one <8 x float> %mask1, zeroinitializer 240 %vaddr = bitcast i8* %addr to <8 x float>* 241 %r = load <8 x float>, <8 x float>* %vaddr, align 32 242 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 243 ret <8 x float>%res 244 } 245 246 ; CHECK-LABEL: test_256_26 247 ; CHECK: vmovups{{.*{%k[1-7]} }} 248 ; CHECK: ret 249 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 250 %mask = fcmp one <8 x float> %mask1, zeroinitializer 251 %vaddr = bitcast i8* %addr to <8 x float>* 252 %r = load <8 x float>, <8 x float>* %vaddr, align 1 253 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 254 ret <8 x float>%res 255 } 256 257 ; CHECK-LABEL: test_256_27 258 ; CHECK: vmovaps{{.*{%k[1-7]} {z} }} 259 ; CHECK: ret 260 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { 261 %mask = fcmp one <8 x float> %mask1, zeroinitializer 262 %vaddr = bitcast i8* %addr to <8 x float>* 263 %r = load <8 x float>, <8 x float>* %vaddr, align 32 264 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 265 ret <8 x float>%res 266 } 267 268 ; CHECK-LABEL: test_256_28 269 ; CHECK: vmovups{{.*{%k[1-7]} {z} }} 270 ; CHECK: ret 271 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) { 272 %mask = fcmp one <8 x float> %mask1, zeroinitializer 273 %vaddr = bitcast i8* %addr to <8 x float>* 274 %r = load <8 x float>, <8 x float>* %vaddr, align 1 275 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 276 ret <8 x float>%res 277 } 278 279 ; CHECK-LABEL: test_256_29 280 ; CHECK: vmovapd{{.*{%k[1-7]} }} 281 ; CHECK: ret 282 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 283 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 284 %vaddr = bitcast i8* %addr to <4 x double>* 285 %r = load <4 x double>, <4 x double>* %vaddr, align 32 286 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 287 ret <4 x double>%res 288 } 289 290 ; CHECK-LABEL: test_256_30 291 ; CHECK: vmovupd{{.*{%k[1-7]} }} 292 ; CHECK: ret 293 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 294 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 295 %vaddr = bitcast i8* %addr to <4 x double>* 296 %r = load <4 x double>, <4 x double>* %vaddr, align 1 297 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 298 ret <4 x double>%res 299 } 300 301 ; CHECK-LABEL: test_256_31 302 ; CHECK: vmovapd{{.*{%k[1-7]} {z} }} 303 ; CHECK: ret 304 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) { 305 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 306 %vaddr = bitcast i8* %addr to <4 x double>* 307 %r = load <4 x double>, <4 x double>* %vaddr, align 32 308 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 309 ret <4 x double>%res 310 } 311 312 ; CHECK-LABEL: test_256_32 313 ; CHECK: vmovupd{{.*{%k[1-7]} {z} }} 314 ; CHECK: ret 315 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) { 316 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 317 %vaddr = bitcast i8* %addr to <4 x double>* 318 %r = load <4 x double>, <4 x double>* %vaddr, align 1 319 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 320 ret <4 x double>%res 321 } 322 323 ; CHECK-LABEL: test_128_1 324 ; CHECK: vmovdqu32 325 ; CHECK: ret 326 define <4 x i32> @test_128_1(i8 * %addr) { 327 %vaddr = bitcast i8* %addr to <4 x i32>* 328 %res = load <4 x i32>, <4 x i32>* %vaddr, align 1 329 ret <4 x i32>%res 330 } 331 332 ; CHECK-LABEL: test_128_2 333 ; CHECK: vmovdqa32 334 ; CHECK: ret 335 define <4 x i32> @test_128_2(i8 * %addr) { 336 %vaddr = bitcast i8* %addr to <4 x i32>* 337 %res = load <4 x i32>, <4 x i32>* %vaddr, align 16 338 ret <4 x i32>%res 339 } 340 341 ; CHECK-LABEL: test_128_3 342 ; CHECK: vmovdqa64 343 ; CHECK: ret 344 define void @test_128_3(i8 * %addr, <2 x i64> %data) { 345 %vaddr = bitcast i8* %addr to <2 x i64>* 346 store <2 x i64>%data, <2 x i64>* %vaddr, align 16 347 ret void 348 } 349 350 ; CHECK-LABEL: test_128_4 351 ; CHECK: vmovdqu32 352 ; CHECK: ret 353 define void @test_128_4(i8 * %addr, <4 x i32> %data) { 354 %vaddr = bitcast i8* %addr to <4 x i32>* 355 store <4 x i32>%data, <4 x i32>* %vaddr, align 1 356 ret void 357 } 358 359 ; CHECK-LABEL: test_128_5 360 ; CHECK: vmovdqa32 361 ; CHECK: ret 362 define void @test_128_5(i8 * %addr, <4 x i32> %data) { 363 %vaddr = bitcast i8* %addr to <4 x i32>* 364 store <4 x i32>%data, <4 x i32>* %vaddr, align 16 365 ret void 366 } 367 368 ; CHECK-LABEL: test_128_6 369 ; CHECK: vmovdqa64 370 ; CHECK: ret 371 define <2 x i64> @test_128_6(i8 * %addr) { 372 %vaddr = bitcast i8* %addr to <2 x i64>* 373 %res = load <2 x i64>, <2 x i64>* %vaddr, align 16 374 ret <2 x i64>%res 375 } 376 377 ; CHECK-LABEL: test_128_7 378 ; CHECK: vmovdqu64 379 ; CHECK: ret 380 define void @test_128_7(i8 * %addr, <2 x i64> %data) { 381 %vaddr = bitcast i8* %addr to <2 x i64>* 382 store <2 x i64>%data, <2 x i64>* %vaddr, align 1 383 ret void 384 } 385 386 ; CHECK-LABEL: test_128_8 387 ; CHECK: vmovdqu64 388 ; CHECK: ret 389 define <2 x i64> @test_128_8(i8 * %addr) { 390 %vaddr = bitcast i8* %addr to <2 x i64>* 391 %res = load <2 x i64>, <2 x i64>* %vaddr, align 1 392 ret <2 x i64>%res 393 } 394 395 ; CHECK-LABEL: test_128_9 396 ; CHECK: vmovapd {{.*}} ## encoding: [0x62 397 ; CHECK: ret 398 define void @test_128_9(i8 * %addr, <2 x double> %data) { 399 %vaddr = bitcast i8* %addr to <2 x double>* 400 store <2 x double>%data, <2 x double>* %vaddr, align 16 401 ret void 402 } 403 404 ; CHECK-LABEL: test_128_10 405 ; CHECK: vmovapd {{.*}} ## encoding: [0x62 406 ; CHECK: ret 407 define <2 x double> @test_128_10(i8 * %addr) { 408 %vaddr = bitcast i8* %addr to <2 x double>* 409 %res = load <2 x double>, <2 x double>* %vaddr, align 16 410 ret <2 x double>%res 411 } 412 413 ; CHECK-LABEL: test_128_11 414 ; CHECK: vmovaps {{.*}} ## encoding: [0x62 415 ; CHECK: ret 416 define void @test_128_11(i8 * %addr, <4 x float> %data) { 417 %vaddr = bitcast i8* %addr to <4 x float>* 418 store <4 x float>%data, <4 x float>* %vaddr, align 16 419 ret void 420 } 421 422 ; CHECK-LABEL: test_128_12 423 ; CHECK: vmovaps {{.*}} ## encoding: [0x62 424 ; CHECK: ret 425 define <4 x float> @test_128_12(i8 * %addr) { 426 %vaddr = bitcast i8* %addr to <4 x float>* 427 %res = load <4 x float>, <4 x float>* %vaddr, align 16 428 ret <4 x float>%res 429 } 430 431 ; CHECK-LABEL: test_128_13 432 ; CHECK: vmovupd {{.*}} ## encoding: [0x62 433 ; CHECK: ret 434 define void @test_128_13(i8 * %addr, <2 x double> %data) { 435 %vaddr = bitcast i8* %addr to <2 x double>* 436 store <2 x double>%data, <2 x double>* %vaddr, align 1 437 ret void 438 } 439 440 ; CHECK-LABEL: test_128_14 441 ; CHECK: vmovupd {{.*}} ## encoding: [0x62 442 ; CHECK: ret 443 define <2 x double> @test_128_14(i8 * %addr) { 444 %vaddr = bitcast i8* %addr to <2 x double>* 445 %res = load <2 x double>, <2 x double>* %vaddr, align 1 446 ret <2 x double>%res 447 } 448 449 ; CHECK-LABEL: test_128_15 450 ; CHECK: vmovups {{.*}} ## encoding: [0x62 451 ; CHECK: ret 452 define void @test_128_15(i8 * %addr, <4 x float> %data) { 453 %vaddr = bitcast i8* %addr to <4 x float>* 454 store <4 x float>%data, <4 x float>* %vaddr, align 1 455 ret void 456 } 457 458 ; CHECK-LABEL: test_128_16 459 ; CHECK: vmovups {{.*}} ## encoding: [0x62 460 ; CHECK: ret 461 define <4 x float> @test_128_16(i8 * %addr) { 462 %vaddr = bitcast i8* %addr to <4 x float>* 463 %res = load <4 x float>, <4 x float>* %vaddr, align 1 464 ret <4 x float>%res 465 } 466 467 ; CHECK-LABEL: test_128_17 468 ; CHECK: vmovdqa32{{.*{%k[1-7]} }} 469 ; CHECK: ret 470 define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 471 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 472 %vaddr = bitcast i8* %addr to <4 x i32>* 473 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 474 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 475 ret <4 x i32>%res 476 } 477 478 ; CHECK-LABEL: test_128_18 479 ; CHECK: vmovdqu32{{.*{%k[1-7]} }} 480 ; CHECK: ret 481 define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 482 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 483 %vaddr = bitcast i8* %addr to <4 x i32>* 484 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 485 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 486 ret <4 x i32>%res 487 } 488 489 ; CHECK-LABEL: test_128_19 490 ; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} 491 ; CHECK: ret 492 define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) { 493 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 494 %vaddr = bitcast i8* %addr to <4 x i32>* 495 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 496 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 497 ret <4 x i32>%res 498 } 499 500 ; CHECK-LABEL: test_128_20 501 ; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} 502 ; CHECK: ret 503 define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) { 504 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 505 %vaddr = bitcast i8* %addr to <4 x i32>* 506 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 507 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 508 ret <4 x i32>%res 509 } 510 511 ; CHECK-LABEL: test_128_21 512 ; CHECK: vmovdqa64{{.*{%k[1-7]} }} 513 ; CHECK: ret 514 define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 515 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 516 %vaddr = bitcast i8* %addr to <2 x i64>* 517 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 518 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 519 ret <2 x i64>%res 520 } 521 522 ; CHECK-LABEL: test_128_22 523 ; CHECK: vmovdqu64{{.*{%k[1-7]} }} 524 ; CHECK: ret 525 define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 526 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 527 %vaddr = bitcast i8* %addr to <2 x i64>* 528 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 529 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 530 ret <2 x i64>%res 531 } 532 533 ; CHECK-LABEL: test_128_23 534 ; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} 535 ; CHECK: ret 536 define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) { 537 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 538 %vaddr = bitcast i8* %addr to <2 x i64>* 539 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 540 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 541 ret <2 x i64>%res 542 } 543 544 ; CHECK-LABEL: test_128_24 545 ; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} 546 ; CHECK: ret 547 define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) { 548 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 549 %vaddr = bitcast i8* %addr to <2 x i64>* 550 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 551 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 552 ret <2 x i64>%res 553 } 554 555 ; CHECK-LABEL: test_128_25 556 ; CHECK: vmovaps{{.*{%k[1-7]} }} 557 ; CHECK: ret 558 define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 559 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 560 %vaddr = bitcast i8* %addr to <4 x float>* 561 %r = load <4 x float>, <4 x float>* %vaddr, align 16 562 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 563 ret <4 x float>%res 564 } 565 566 ; CHECK-LABEL: test_128_26 567 ; CHECK: vmovups{{.*{%k[1-7]} }} 568 ; CHECK: ret 569 define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 570 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 571 %vaddr = bitcast i8* %addr to <4 x float>* 572 %r = load <4 x float>, <4 x float>* %vaddr, align 1 573 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 574 ret <4 x float>%res 575 } 576 577 ; CHECK-LABEL: test_128_27 578 ; CHECK: vmovaps{{.*{%k[1-7]} {z} }} 579 ; CHECK: ret 580 define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) { 581 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 582 %vaddr = bitcast i8* %addr to <4 x float>* 583 %r = load <4 x float>, <4 x float>* %vaddr, align 16 584 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 585 ret <4 x float>%res 586 } 587 588 ; CHECK-LABEL: test_128_28 589 ; CHECK: vmovups{{.*{%k[1-7]} {z} }} 590 ; CHECK: ret 591 define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) { 592 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 593 %vaddr = bitcast i8* %addr to <4 x float>* 594 %r = load <4 x float>, <4 x float>* %vaddr, align 1 595 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 596 ret <4 x float>%res 597 } 598 599 ; CHECK-LABEL: test_128_29 600 ; CHECK: vmovapd{{.*{%k[1-7]} }} 601 ; CHECK: ret 602 define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 603 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 604 %vaddr = bitcast i8* %addr to <2 x double>* 605 %r = load <2 x double>, <2 x double>* %vaddr, align 16 606 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 607 ret <2 x double>%res 608 } 609 610 ; CHECK-LABEL: test_128_30 611 ; CHECK: vmovupd{{.*{%k[1-7]} }} 612 ; CHECK: ret 613 define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 614 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 615 %vaddr = bitcast i8* %addr to <2 x double>* 616 %r = load <2 x double>, <2 x double>* %vaddr, align 1 617 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 618 ret <2 x double>%res 619 } 620 621 ; CHECK-LABEL: test_128_31 622 ; CHECK: vmovapd{{.*{%k[1-7]} {z} }} 623 ; CHECK: ret 624 define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) { 625 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 626 %vaddr = bitcast i8* %addr to <2 x double>* 627 %r = load <2 x double>, <2 x double>* %vaddr, align 16 628 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 629 ret <2 x double>%res 630 } 631 632 ; CHECK-LABEL: test_128_32 633 ; CHECK: vmovupd{{.*{%k[1-7]} {z} }} 634 ; CHECK: ret 635 define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) { 636 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 637 %vaddr = bitcast i8* %addr to <2 x double>* 638 %r = load <2 x double>, <2 x double>* %vaddr, align 1 639 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 640 ret <2 x double>%res 641 } 642 643