1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVXONLY 4 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL 5 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX 6 7 ; Verify that fast-isel knows how to select aligned/unaligned vector loads. 8 ; Also verify that the selected load instruction is in the correct domain. 9 10 define <16 x i8> @test_v16i8(<16 x i8>* %V) { 11 ; SSE-LABEL: test_v16i8: 12 ; SSE: # %bb.0: # %entry 13 ; SSE-NEXT: movdqa (%rdi), %xmm0 14 ; SSE-NEXT: retq 15 ; 16 ; AVXONLY-LABEL: test_v16i8: 17 ; AVXONLY: # %bb.0: # %entry 18 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 19 ; AVXONLY-NEXT: retq 20 ; 21 ; KNL-LABEL: test_v16i8: 22 ; KNL: # %bb.0: # %entry 23 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 24 ; KNL-NEXT: retq 25 ; 26 ; SKX-LABEL: test_v16i8: 27 ; SKX: # %bb.0: # %entry 28 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 29 ; SKX-NEXT: retq 30 entry: 31 %0 = load <16 x i8>, <16 x i8>* %V, align 16 32 ret <16 x i8> %0 33 } 34 35 define <8 x i16> @test_v8i16(<8 x i16>* %V) { 36 ; SSE-LABEL: test_v8i16: 37 ; SSE: # %bb.0: # %entry 38 ; SSE-NEXT: movdqa (%rdi), %xmm0 39 ; SSE-NEXT: retq 40 ; 41 ; AVXONLY-LABEL: test_v8i16: 42 ; AVXONLY: # %bb.0: # %entry 43 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 44 ; AVXONLY-NEXT: retq 45 ; 46 ; KNL-LABEL: test_v8i16: 47 ; KNL: # %bb.0: # %entry 48 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 49 ; KNL-NEXT: retq 50 ; 51 ; SKX-LABEL: test_v8i16: 52 ; SKX: # %bb.0: # %entry 53 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 54 ; SKX-NEXT: retq 55 entry: 56 %0 = load <8 x i16>, <8 x i16>* %V, align 16 57 ret <8 x i16> %0 58 } 59 60 define <4 x i32> @test_v4i32(<4 x i32>* %V) { 61 ; SSE-LABEL: test_v4i32: 62 ; SSE: # %bb.0: # %entry 63 ; SSE-NEXT: movdqa (%rdi), %xmm0 64 ; SSE-NEXT: retq 65 ; 66 ; AVXONLY-LABEL: test_v4i32: 67 ; AVXONLY: # %bb.0: # %entry 68 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 69 ; AVXONLY-NEXT: retq 70 ; 71 ; KNL-LABEL: test_v4i32: 72 ; KNL: # %bb.0: # %entry 73 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 74 ; KNL-NEXT: retq 75 ; 76 ; SKX-LABEL: test_v4i32: 77 ; SKX: # %bb.0: # %entry 78 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 79 ; SKX-NEXT: retq 80 entry: 81 %0 = load <4 x i32>, <4 x i32>* %V, align 16 82 ret <4 x i32> %0 83 } 84 85 define <2 x i64> @test_v2i64(<2 x i64>* %V) { 86 ; SSE-LABEL: test_v2i64: 87 ; SSE: # %bb.0: # %entry 88 ; SSE-NEXT: movdqa (%rdi), %xmm0 89 ; SSE-NEXT: retq 90 ; 91 ; AVXONLY-LABEL: test_v2i64: 92 ; AVXONLY: # %bb.0: # %entry 93 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 94 ; AVXONLY-NEXT: retq 95 ; 96 ; KNL-LABEL: test_v2i64: 97 ; KNL: # %bb.0: # %entry 98 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 99 ; KNL-NEXT: retq 100 ; 101 ; SKX-LABEL: test_v2i64: 102 ; SKX: # %bb.0: # %entry 103 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 104 ; SKX-NEXT: retq 105 entry: 106 %0 = load <2 x i64>, <2 x i64>* %V, align 16 107 ret <2 x i64> %0 108 } 109 110 define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) { 111 ; SSE-LABEL: test_v16i8_unaligned: 112 ; SSE: # %bb.0: # %entry 113 ; SSE-NEXT: movdqu (%rdi), %xmm0 114 ; SSE-NEXT: retq 115 ; 116 ; AVXONLY-LABEL: test_v16i8_unaligned: 117 ; AVXONLY: # %bb.0: # %entry 118 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 119 ; AVXONLY-NEXT: retq 120 ; 121 ; KNL-LABEL: test_v16i8_unaligned: 122 ; KNL: # %bb.0: # %entry 123 ; KNL-NEXT: vmovdqu (%rdi), %xmm0 124 ; KNL-NEXT: retq 125 ; 126 ; SKX-LABEL: test_v16i8_unaligned: 127 ; SKX: # %bb.0: # %entry 128 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 129 ; SKX-NEXT: retq 130 entry: 131 %0 = load <16 x i8>, <16 x i8>* %V, align 4 132 ret <16 x i8> %0 133 } 134 135 define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) { 136 ; SSE-LABEL: test_v8i16_unaligned: 137 ; SSE: # %bb.0: # %entry 138 ; SSE-NEXT: movdqu (%rdi), %xmm0 139 ; SSE-NEXT: retq 140 ; 141 ; AVXONLY-LABEL: test_v8i16_unaligned: 142 ; AVXONLY: # %bb.0: # %entry 143 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 144 ; AVXONLY-NEXT: retq 145 ; 146 ; KNL-LABEL: test_v8i16_unaligned: 147 ; KNL: # %bb.0: # %entry 148 ; KNL-NEXT: vmovdqu (%rdi), %xmm0 149 ; KNL-NEXT: retq 150 ; 151 ; SKX-LABEL: test_v8i16_unaligned: 152 ; SKX: # %bb.0: # %entry 153 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 154 ; SKX-NEXT: retq 155 entry: 156 %0 = load <8 x i16>, <8 x i16>* %V, align 4 157 ret <8 x i16> %0 158 } 159 160 define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) { 161 ; SSE-LABEL: test_v4i32_unaligned: 162 ; SSE: # %bb.0: # %entry 163 ; SSE-NEXT: movdqu (%rdi), %xmm0 164 ; SSE-NEXT: retq 165 ; 166 ; AVXONLY-LABEL: test_v4i32_unaligned: 167 ; AVXONLY: # %bb.0: # %entry 168 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 169 ; AVXONLY-NEXT: retq 170 ; 171 ; KNL-LABEL: test_v4i32_unaligned: 172 ; KNL: # %bb.0: # %entry 173 ; KNL-NEXT: vmovdqu (%rdi), %xmm0 174 ; KNL-NEXT: retq 175 ; 176 ; SKX-LABEL: test_v4i32_unaligned: 177 ; SKX: # %bb.0: # %entry 178 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 179 ; SKX-NEXT: retq 180 entry: 181 %0 = load <4 x i32>, <4 x i32>* %V, align 4 182 ret <4 x i32> %0 183 } 184 185 define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) { 186 ; SSE-LABEL: test_v2i64_unaligned: 187 ; SSE: # %bb.0: # %entry 188 ; SSE-NEXT: movdqu (%rdi), %xmm0 189 ; SSE-NEXT: retq 190 ; 191 ; AVXONLY-LABEL: test_v2i64_unaligned: 192 ; AVXONLY: # %bb.0: # %entry 193 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 194 ; AVXONLY-NEXT: retq 195 ; 196 ; KNL-LABEL: test_v2i64_unaligned: 197 ; KNL: # %bb.0: # %entry 198 ; KNL-NEXT: vmovdqu (%rdi), %xmm0 199 ; KNL-NEXT: retq 200 ; 201 ; SKX-LABEL: test_v2i64_unaligned: 202 ; SKX: # %bb.0: # %entry 203 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 204 ; SKX-NEXT: retq 205 entry: 206 %0 = load <2 x i64>, <2 x i64>* %V, align 4 207 ret <2 x i64> %0 208 } 209 210 define <4 x float> @test_v4f32(<4 x float>* %V) { 211 ; SSE-LABEL: test_v4f32: 212 ; SSE: # %bb.0: # %entry 213 ; SSE-NEXT: movaps (%rdi), %xmm0 214 ; SSE-NEXT: retq 215 ; 216 ; AVX-LABEL: test_v4f32: 217 ; AVX: # %bb.0: # %entry 218 ; AVX-NEXT: vmovaps (%rdi), %xmm0 219 ; AVX-NEXT: retq 220 entry: 221 %0 = load <4 x float>, <4 x float>* %V, align 16 222 ret <4 x float> %0 223 } 224 225 define <2 x double> @test_v2f64(<2 x double>* %V) { 226 ; SSE-LABEL: test_v2f64: 227 ; SSE: # %bb.0: # %entry 228 ; SSE-NEXT: movapd (%rdi), %xmm0 229 ; SSE-NEXT: retq 230 ; 231 ; AVX-LABEL: test_v2f64: 232 ; AVX: # %bb.0: # %entry 233 ; AVX-NEXT: vmovapd (%rdi), %xmm0 234 ; AVX-NEXT: retq 235 entry: 236 %0 = load <2 x double>, <2 x double>* %V, align 16 237 ret <2 x double> %0 238 } 239 240 define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) { 241 ; SSE-LABEL: test_v4f32_unaligned: 242 ; SSE: # %bb.0: # %entry 243 ; SSE-NEXT: movups (%rdi), %xmm0 244 ; SSE-NEXT: retq 245 ; 246 ; AVX-LABEL: test_v4f32_unaligned: 247 ; AVX: # %bb.0: # %entry 248 ; AVX-NEXT: vmovups (%rdi), %xmm0 249 ; AVX-NEXT: retq 250 entry: 251 %0 = load <4 x float>, <4 x float>* %V, align 4 252 ret <4 x float> %0 253 } 254 255 define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) { 256 ; SSE-LABEL: test_v2f64_unaligned: 257 ; SSE: # %bb.0: # %entry 258 ; SSE-NEXT: movupd (%rdi), %xmm0 259 ; SSE-NEXT: retq 260 ; 261 ; AVX-LABEL: test_v2f64_unaligned: 262 ; AVX: # %bb.0: # %entry 263 ; AVX-NEXT: vmovupd (%rdi), %xmm0 264 ; AVX-NEXT: retq 265 entry: 266 %0 = load <2 x double>, <2 x double>* %V, align 4 267 ret <2 x double> %0 268 } 269 270 define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) { 271 ; SSE-LABEL: test_v16i8_abi_alignment: 272 ; SSE: # %bb.0: # %entry 273 ; SSE-NEXT: movdqa (%rdi), %xmm0 274 ; SSE-NEXT: retq 275 ; 276 ; AVXONLY-LABEL: test_v16i8_abi_alignment: 277 ; AVXONLY: # %bb.0: # %entry 278 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 279 ; AVXONLY-NEXT: retq 280 ; 281 ; KNL-LABEL: test_v16i8_abi_alignment: 282 ; KNL: # %bb.0: # %entry 283 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 284 ; KNL-NEXT: retq 285 ; 286 ; SKX-LABEL: test_v16i8_abi_alignment: 287 ; SKX: # %bb.0: # %entry 288 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 289 ; SKX-NEXT: retq 290 entry: 291 %0 = load <16 x i8>, <16 x i8>* %V 292 ret <16 x i8> %0 293 } 294 295 define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) { 296 ; SSE-LABEL: test_v8i16_abi_alignment: 297 ; SSE: # %bb.0: # %entry 298 ; SSE-NEXT: movdqa (%rdi), %xmm0 299 ; SSE-NEXT: retq 300 ; 301 ; AVXONLY-LABEL: test_v8i16_abi_alignment: 302 ; AVXONLY: # %bb.0: # %entry 303 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 304 ; AVXONLY-NEXT: retq 305 ; 306 ; KNL-LABEL: test_v8i16_abi_alignment: 307 ; KNL: # %bb.0: # %entry 308 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 309 ; KNL-NEXT: retq 310 ; 311 ; SKX-LABEL: test_v8i16_abi_alignment: 312 ; SKX: # %bb.0: # %entry 313 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 314 ; SKX-NEXT: retq 315 entry: 316 %0 = load <8 x i16>, <8 x i16>* %V 317 ret <8 x i16> %0 318 } 319 320 define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) { 321 ; SSE-LABEL: test_v4i32_abi_alignment: 322 ; SSE: # %bb.0: # %entry 323 ; SSE-NEXT: movdqa (%rdi), %xmm0 324 ; SSE-NEXT: retq 325 ; 326 ; AVXONLY-LABEL: test_v4i32_abi_alignment: 327 ; AVXONLY: # %bb.0: # %entry 328 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 329 ; AVXONLY-NEXT: retq 330 ; 331 ; KNL-LABEL: test_v4i32_abi_alignment: 332 ; KNL: # %bb.0: # %entry 333 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 334 ; KNL-NEXT: retq 335 ; 336 ; SKX-LABEL: test_v4i32_abi_alignment: 337 ; SKX: # %bb.0: # %entry 338 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 339 ; SKX-NEXT: retq 340 entry: 341 %0 = load <4 x i32>, <4 x i32>* %V 342 ret <4 x i32> %0 343 } 344 345 define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) { 346 ; SSE-LABEL: test_v2i64_abi_alignment: 347 ; SSE: # %bb.0: # %entry 348 ; SSE-NEXT: movdqa (%rdi), %xmm0 349 ; SSE-NEXT: retq 350 ; 351 ; AVXONLY-LABEL: test_v2i64_abi_alignment: 352 ; AVXONLY: # %bb.0: # %entry 353 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 354 ; AVXONLY-NEXT: retq 355 ; 356 ; KNL-LABEL: test_v2i64_abi_alignment: 357 ; KNL: # %bb.0: # %entry 358 ; KNL-NEXT: vmovdqa (%rdi), %xmm0 359 ; KNL-NEXT: retq 360 ; 361 ; SKX-LABEL: test_v2i64_abi_alignment: 362 ; SKX: # %bb.0: # %entry 363 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 364 ; SKX-NEXT: retq 365 entry: 366 %0 = load <2 x i64>, <2 x i64>* %V 367 ret <2 x i64> %0 368 } 369 370 define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) { 371 ; SSE-LABEL: test_v4f32_abi_alignment: 372 ; SSE: # %bb.0: # %entry 373 ; SSE-NEXT: movaps (%rdi), %xmm0 374 ; SSE-NEXT: retq 375 ; 376 ; AVX-LABEL: test_v4f32_abi_alignment: 377 ; AVX: # %bb.0: # %entry 378 ; AVX-NEXT: vmovaps (%rdi), %xmm0 379 ; AVX-NEXT: retq 380 entry: 381 %0 = load <4 x float>, <4 x float>* %V 382 ret <4 x float> %0 383 } 384 385 define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) { 386 ; SSE-LABEL: test_v2f64_abi_alignment: 387 ; SSE: # %bb.0: # %entry 388 ; SSE-NEXT: movapd (%rdi), %xmm0 389 ; SSE-NEXT: retq 390 ; 391 ; AVX-LABEL: test_v2f64_abi_alignment: 392 ; AVX: # %bb.0: # %entry 393 ; AVX-NEXT: vmovapd (%rdi), %xmm0 394 ; AVX-NEXT: retq 395 entry: 396 %0 = load <2 x double>, <2 x double>* %V 397 ret <2 x double> %0 398 } 399 400 define <32 x i8> @test_v32i8(<32 x i8>* %V) { 401 ; SSE-LABEL: test_v32i8: 402 ; SSE: # %bb.0: # %entry 403 ; SSE-NEXT: movaps (%rdi), %xmm0 404 ; SSE-NEXT: movaps 16(%rdi), %xmm1 405 ; SSE-NEXT: retq 406 ; 407 ; AVXONLY-LABEL: test_v32i8: 408 ; AVXONLY: # %bb.0: # %entry 409 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 410 ; AVXONLY-NEXT: retq 411 ; 412 ; KNL-LABEL: test_v32i8: 413 ; KNL: # %bb.0: # %entry 414 ; KNL-NEXT: vmovdqa (%rdi), %ymm0 415 ; KNL-NEXT: retq 416 ; 417 ; SKX-LABEL: test_v32i8: 418 ; SKX: # %bb.0: # %entry 419 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 420 ; SKX-NEXT: retq 421 entry: 422 %0 = load <32 x i8>, <32 x i8>* %V, align 32 423 ret <32 x i8> %0 424 } 425 426 define <16 x i16> @test_v16i16(<16 x i16>* %V) { 427 ; SSE-LABEL: test_v16i16: 428 ; SSE: # %bb.0: # %entry 429 ; SSE-NEXT: movaps (%rdi), %xmm0 430 ; SSE-NEXT: movaps 16(%rdi), %xmm1 431 ; SSE-NEXT: retq 432 ; 433 ; AVXONLY-LABEL: test_v16i16: 434 ; AVXONLY: # %bb.0: # %entry 435 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 436 ; AVXONLY-NEXT: retq 437 ; 438 ; KNL-LABEL: test_v16i16: 439 ; KNL: # %bb.0: # %entry 440 ; KNL-NEXT: vmovdqa (%rdi), %ymm0 441 ; KNL-NEXT: retq 442 ; 443 ; SKX-LABEL: test_v16i16: 444 ; SKX: # %bb.0: # %entry 445 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 446 ; SKX-NEXT: retq 447 entry: 448 %0 = load <16 x i16>, <16 x i16>* %V, align 32 449 ret <16 x i16> %0 450 } 451 452 define <8 x i32> @test_v8i32(<8 x i32>* %V) { 453 ; SSE-LABEL: test_v8i32: 454 ; SSE: # %bb.0: # %entry 455 ; SSE-NEXT: movaps (%rdi), %xmm0 456 ; SSE-NEXT: movaps 16(%rdi), %xmm1 457 ; SSE-NEXT: retq 458 ; 459 ; AVXONLY-LABEL: test_v8i32: 460 ; AVXONLY: # %bb.0: # %entry 461 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 462 ; AVXONLY-NEXT: retq 463 ; 464 ; KNL-LABEL: test_v8i32: 465 ; KNL: # %bb.0: # %entry 466 ; KNL-NEXT: vmovdqa (%rdi), %ymm0 467 ; KNL-NEXT: retq 468 ; 469 ; SKX-LABEL: test_v8i32: 470 ; SKX: # %bb.0: # %entry 471 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 472 ; SKX-NEXT: retq 473 entry: 474 %0 = load <8 x i32>, <8 x i32>* %V, align 32 475 ret <8 x i32> %0 476 } 477 478 define <4 x i64> @test_v4i64(<4 x i64>* %V) { 479 ; SSE-LABEL: test_v4i64: 480 ; SSE: # %bb.0: # %entry 481 ; SSE-NEXT: movaps (%rdi), %xmm0 482 ; SSE-NEXT: movaps 16(%rdi), %xmm1 483 ; SSE-NEXT: retq 484 ; 485 ; AVXONLY-LABEL: test_v4i64: 486 ; AVXONLY: # %bb.0: # %entry 487 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 488 ; AVXONLY-NEXT: retq 489 ; 490 ; KNL-LABEL: test_v4i64: 491 ; KNL: # %bb.0: # %entry 492 ; KNL-NEXT: vmovdqa (%rdi), %ymm0 493 ; KNL-NEXT: retq 494 ; 495 ; SKX-LABEL: test_v4i64: 496 ; SKX: # %bb.0: # %entry 497 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 498 ; SKX-NEXT: retq 499 entry: 500 %0 = load <4 x i64>, <4 x i64>* %V, align 32 501 ret <4 x i64> %0 502 } 503 504 define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) { 505 ; SSE-LABEL: test_v32i8_unaligned: 506 ; SSE: # %bb.0: # %entry 507 ; SSE-NEXT: movups (%rdi), %xmm0 508 ; SSE-NEXT: movups 16(%rdi), %xmm1 509 ; SSE-NEXT: retq 510 ; 511 ; AVXONLY-LABEL: test_v32i8_unaligned: 512 ; AVXONLY: # %bb.0: # %entry 513 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 514 ; AVXONLY-NEXT: retq 515 ; 516 ; KNL-LABEL: test_v32i8_unaligned: 517 ; KNL: # %bb.0: # %entry 518 ; KNL-NEXT: vmovdqu (%rdi), %ymm0 519 ; KNL-NEXT: retq 520 ; 521 ; SKX-LABEL: test_v32i8_unaligned: 522 ; SKX: # %bb.0: # %entry 523 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 524 ; SKX-NEXT: retq 525 entry: 526 %0 = load <32 x i8>, <32 x i8>* %V, align 4 527 ret <32 x i8> %0 528 } 529 530 define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) { 531 ; SSE-LABEL: test_v16i16_unaligned: 532 ; SSE: # %bb.0: # %entry 533 ; SSE-NEXT: movups (%rdi), %xmm0 534 ; SSE-NEXT: movups 16(%rdi), %xmm1 535 ; SSE-NEXT: retq 536 ; 537 ; AVXONLY-LABEL: test_v16i16_unaligned: 538 ; AVXONLY: # %bb.0: # %entry 539 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 540 ; AVXONLY-NEXT: retq 541 ; 542 ; KNL-LABEL: test_v16i16_unaligned: 543 ; KNL: # %bb.0: # %entry 544 ; KNL-NEXT: vmovdqu (%rdi), %ymm0 545 ; KNL-NEXT: retq 546 ; 547 ; SKX-LABEL: test_v16i16_unaligned: 548 ; SKX: # %bb.0: # %entry 549 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 550 ; SKX-NEXT: retq 551 entry: 552 %0 = load <16 x i16>, <16 x i16>* %V, align 4 553 ret <16 x i16> %0 554 } 555 556 define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) { 557 ; SSE-LABEL: test_v8i32_unaligned: 558 ; SSE: # %bb.0: # %entry 559 ; SSE-NEXT: movups (%rdi), %xmm0 560 ; SSE-NEXT: movups 16(%rdi), %xmm1 561 ; SSE-NEXT: retq 562 ; 563 ; AVXONLY-LABEL: test_v8i32_unaligned: 564 ; AVXONLY: # %bb.0: # %entry 565 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 566 ; AVXONLY-NEXT: retq 567 ; 568 ; KNL-LABEL: test_v8i32_unaligned: 569 ; KNL: # %bb.0: # %entry 570 ; KNL-NEXT: vmovdqu (%rdi), %ymm0 571 ; KNL-NEXT: retq 572 ; 573 ; SKX-LABEL: test_v8i32_unaligned: 574 ; SKX: # %bb.0: # %entry 575 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 576 ; SKX-NEXT: retq 577 entry: 578 %0 = load <8 x i32>, <8 x i32>* %V, align 4 579 ret <8 x i32> %0 580 } 581 582 define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) { 583 ; SSE-LABEL: test_v4i64_unaligned: 584 ; SSE: # %bb.0: # %entry 585 ; SSE-NEXT: movups (%rdi), %xmm0 586 ; SSE-NEXT: movups 16(%rdi), %xmm1 587 ; SSE-NEXT: retq 588 ; 589 ; AVXONLY-LABEL: test_v4i64_unaligned: 590 ; AVXONLY: # %bb.0: # %entry 591 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 592 ; AVXONLY-NEXT: retq 593 ; 594 ; KNL-LABEL: test_v4i64_unaligned: 595 ; KNL: # %bb.0: # %entry 596 ; KNL-NEXT: vmovdqu (%rdi), %ymm0 597 ; KNL-NEXT: retq 598 ; 599 ; SKX-LABEL: test_v4i64_unaligned: 600 ; SKX: # %bb.0: # %entry 601 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 602 ; SKX-NEXT: retq 603 entry: 604 %0 = load <4 x i64>, <4 x i64>* %V, align 4 605 ret <4 x i64> %0 606 } 607 608 define <8 x float> @test_v8f32(<8 x float>* %V) { 609 ; SSE-LABEL: test_v8f32: 610 ; SSE: # %bb.0: # %entry 611 ; SSE-NEXT: movaps (%rdi), %xmm0 612 ; SSE-NEXT: movaps 16(%rdi), %xmm1 613 ; SSE-NEXT: retq 614 ; 615 ; AVX-LABEL: test_v8f32: 616 ; AVX: # %bb.0: # %entry 617 ; AVX-NEXT: vmovaps (%rdi), %ymm0 618 ; AVX-NEXT: retq 619 entry: 620 %0 = load <8 x float>, <8 x float>* %V, align 32 621 ret <8 x float> %0 622 } 623 624 define <4 x double> @test_v4f64(<4 x double>* %V) { 625 ; SSE-LABEL: test_v4f64: 626 ; SSE: # %bb.0: # %entry 627 ; SSE-NEXT: movapd (%rdi), %xmm0 628 ; SSE-NEXT: movapd 16(%rdi), %xmm1 629 ; SSE-NEXT: retq 630 ; 631 ; AVX-LABEL: test_v4f64: 632 ; AVX: # %bb.0: # %entry 633 ; AVX-NEXT: vmovapd (%rdi), %ymm0 634 ; AVX-NEXT: retq 635 entry: 636 %0 = load <4 x double>, <4 x double>* %V, align 32 637 ret <4 x double> %0 638 } 639 640 define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) { 641 ; SSE-LABEL: test_v8f32_unaligned: 642 ; SSE: # %bb.0: # %entry 643 ; SSE-NEXT: movups (%rdi), %xmm0 644 ; SSE-NEXT: movups 16(%rdi), %xmm1 645 ; SSE-NEXT: retq 646 ; 647 ; AVX-LABEL: test_v8f32_unaligned: 648 ; AVX: # %bb.0: # %entry 649 ; AVX-NEXT: vmovups (%rdi), %ymm0 650 ; AVX-NEXT: retq 651 entry: 652 %0 = load <8 x float>, <8 x float>* %V, align 4 653 ret <8 x float> %0 654 } 655 656 define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) { 657 ; SSE-LABEL: test_v4f64_unaligned: 658 ; SSE: # %bb.0: # %entry 659 ; SSE-NEXT: movupd (%rdi), %xmm0 660 ; SSE-NEXT: movupd 16(%rdi), %xmm1 661 ; SSE-NEXT: retq 662 ; 663 ; AVX-LABEL: test_v4f64_unaligned: 664 ; AVX: # %bb.0: # %entry 665 ; AVX-NEXT: vmovupd (%rdi), %ymm0 666 ; AVX-NEXT: retq 667 entry: 668 %0 = load <4 x double>, <4 x double>* %V, align 4 669 ret <4 x double> %0 670 } 671 672 define <64 x i8> @test_v64i8(<64 x i8>* %V) { 673 ; SSE-LABEL: test_v64i8: 674 ; SSE: # %bb.0: # %entry 675 ; SSE-NEXT: movaps (%rdi), %xmm0 676 ; SSE-NEXT: movaps 16(%rdi), %xmm1 677 ; SSE-NEXT: movaps 32(%rdi), %xmm2 678 ; SSE-NEXT: movaps 48(%rdi), %xmm3 679 ; SSE-NEXT: retq 680 ; 681 ; AVXONLY-LABEL: test_v64i8: 682 ; AVXONLY: # %bb.0: # %entry 683 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 684 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 685 ; AVXONLY-NEXT: retq 686 ; 687 ; KNL-LABEL: test_v64i8: 688 ; KNL: # %bb.0: # %entry 689 ; KNL-NEXT: vmovaps (%rdi), %ymm0 690 ; KNL-NEXT: vmovaps 32(%rdi), %ymm1 691 ; KNL-NEXT: retq 692 ; 693 ; SKX-LABEL: test_v64i8: 694 ; SKX: # %bb.0: # %entry 695 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 696 ; SKX-NEXT: retq 697 entry: 698 %0 = load <64 x i8>, <64 x i8>* %V, align 64 699 ret <64 x i8> %0 700 } 701 702 define <32 x i16> @test_v32i16(<32 x i16>* %V) { 703 ; SSE-LABEL: test_v32i16: 704 ; SSE: # %bb.0: # %entry 705 ; SSE-NEXT: movaps (%rdi), %xmm0 706 ; SSE-NEXT: movaps 16(%rdi), %xmm1 707 ; SSE-NEXT: movaps 32(%rdi), %xmm2 708 ; SSE-NEXT: movaps 48(%rdi), %xmm3 709 ; SSE-NEXT: retq 710 ; 711 ; AVXONLY-LABEL: test_v32i16: 712 ; AVXONLY: # %bb.0: # %entry 713 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 714 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 715 ; AVXONLY-NEXT: retq 716 ; 717 ; KNL-LABEL: test_v32i16: 718 ; KNL: # %bb.0: # %entry 719 ; KNL-NEXT: vmovaps (%rdi), %ymm0 720 ; KNL-NEXT: vmovaps 32(%rdi), %ymm1 721 ; KNL-NEXT: retq 722 ; 723 ; SKX-LABEL: test_v32i16: 724 ; SKX: # %bb.0: # %entry 725 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 726 ; SKX-NEXT: retq 727 entry: 728 %0 = load <32 x i16>, <32 x i16>* %V, align 64 729 ret <32 x i16> %0 730 } 731 732 define <16 x i32> @test_v16i32(<16 x i32>* %V) { 733 ; SSE-LABEL: test_v16i32: 734 ; SSE: # %bb.0: # %entry 735 ; SSE-NEXT: movaps (%rdi), %xmm0 736 ; SSE-NEXT: movaps 16(%rdi), %xmm1 737 ; SSE-NEXT: movaps 32(%rdi), %xmm2 738 ; SSE-NEXT: movaps 48(%rdi), %xmm3 739 ; SSE-NEXT: retq 740 ; 741 ; AVXONLY-LABEL: test_v16i32: 742 ; AVXONLY: # %bb.0: # %entry 743 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 744 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 745 ; AVXONLY-NEXT: retq 746 ; 747 ; AVX512-LABEL: test_v16i32: 748 ; AVX512: # %bb.0: # %entry 749 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 750 ; AVX512-NEXT: retq 751 entry: 752 %0 = load <16 x i32>, <16 x i32>* %V, align 64 753 ret <16 x i32> %0 754 } 755 756 define <8 x i64> @test_v8i64(<8 x i64>* %V) { 757 ; SSE-LABEL: test_v8i64: 758 ; SSE: # %bb.0: # %entry 759 ; SSE-NEXT: movaps (%rdi), %xmm0 760 ; SSE-NEXT: movaps 16(%rdi), %xmm1 761 ; SSE-NEXT: movaps 32(%rdi), %xmm2 762 ; SSE-NEXT: movaps 48(%rdi), %xmm3 763 ; SSE-NEXT: retq 764 ; 765 ; AVXONLY-LABEL: test_v8i64: 766 ; AVXONLY: # %bb.0: # %entry 767 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 768 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 769 ; AVXONLY-NEXT: retq 770 ; 771 ; AVX512-LABEL: test_v8i64: 772 ; AVX512: # %bb.0: # %entry 773 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 774 ; AVX512-NEXT: retq 775 entry: 776 %0 = load <8 x i64>, <8 x i64>* %V, align 64 777 ret <8 x i64> %0 778 } 779 780 define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) { 781 ; SSE-LABEL: test_v64i8_unaligned: 782 ; SSE: # %bb.0: # %entry 783 ; SSE-NEXT: movups (%rdi), %xmm0 784 ; SSE-NEXT: movups 16(%rdi), %xmm1 785 ; SSE-NEXT: movups 32(%rdi), %xmm2 786 ; SSE-NEXT: movups 48(%rdi), %xmm3 787 ; SSE-NEXT: retq 788 ; 789 ; AVXONLY-LABEL: test_v64i8_unaligned: 790 ; AVXONLY: # %bb.0: # %entry 791 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0 792 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 793 ; AVXONLY-NEXT: retq 794 ; 795 ; KNL-LABEL: test_v64i8_unaligned: 796 ; KNL: # %bb.0: # %entry 797 ; KNL-NEXT: vmovups (%rdi), %ymm0 798 ; KNL-NEXT: vmovups 32(%rdi), %ymm1 799 ; KNL-NEXT: retq 800 ; 801 ; SKX-LABEL: test_v64i8_unaligned: 802 ; SKX: # %bb.0: # %entry 803 ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 804 ; SKX-NEXT: retq 805 entry: 806 %0 = load <64 x i8>, <64 x i8>* %V, align 4 807 ret <64 x i8> %0 808 } 809 810 define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) { 811 ; SSE-LABEL: test_v32i16_unaligned: 812 ; SSE: # %bb.0: # %entry 813 ; SSE-NEXT: movups (%rdi), %xmm0 814 ; SSE-NEXT: movups 16(%rdi), %xmm1 815 ; SSE-NEXT: movups 32(%rdi), %xmm2 816 ; SSE-NEXT: movups 48(%rdi), %xmm3 817 ; SSE-NEXT: retq 818 ; 819 ; AVXONLY-LABEL: test_v32i16_unaligned: 820 ; AVXONLY: # %bb.0: # %entry 821 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0 822 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 823 ; AVXONLY-NEXT: retq 824 ; 825 ; KNL-LABEL: test_v32i16_unaligned: 826 ; KNL: # %bb.0: # %entry 827 ; KNL-NEXT: vmovups (%rdi), %ymm0 828 ; KNL-NEXT: vmovups 32(%rdi), %ymm1 829 ; KNL-NEXT: retq 830 ; 831 ; SKX-LABEL: test_v32i16_unaligned: 832 ; SKX: # %bb.0: # %entry 833 ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 834 ; SKX-NEXT: retq 835 entry: 836 %0 = load <32 x i16>, <32 x i16>* %V, align 4 837 ret <32 x i16> %0 838 } 839 840 define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) { 841 ; SSE-LABEL: test_v16i32_unaligned: 842 ; SSE: # %bb.0: # %entry 843 ; SSE-NEXT: movups (%rdi), %xmm0 844 ; SSE-NEXT: movups 16(%rdi), %xmm1 845 ; SSE-NEXT: movups 32(%rdi), %xmm2 846 ; SSE-NEXT: movups 48(%rdi), %xmm3 847 ; SSE-NEXT: retq 848 ; 849 ; AVXONLY-LABEL: test_v16i32_unaligned: 850 ; AVXONLY: # %bb.0: # %entry 851 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0 852 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 853 ; AVXONLY-NEXT: retq 854 ; 855 ; AVX512-LABEL: test_v16i32_unaligned: 856 ; AVX512: # %bb.0: # %entry 857 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 858 ; AVX512-NEXT: retq 859 entry: 860 %0 = load <16 x i32>, <16 x i32>* %V, align 4 861 ret <16 x i32> %0 862 } 863 864 define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) { 865 ; SSE-LABEL: test_v8i64_unaligned: 866 ; SSE: # %bb.0: # %entry 867 ; SSE-NEXT: movups (%rdi), %xmm0 868 ; SSE-NEXT: movups 16(%rdi), %xmm1 869 ; SSE-NEXT: movups 32(%rdi), %xmm2 870 ; SSE-NEXT: movups 48(%rdi), %xmm3 871 ; SSE-NEXT: retq 872 ; 873 ; AVXONLY-LABEL: test_v8i64_unaligned: 874 ; AVXONLY: # %bb.0: # %entry 875 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0 876 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 877 ; AVXONLY-NEXT: retq 878 ; 879 ; AVX512-LABEL: test_v8i64_unaligned: 880 ; AVX512: # %bb.0: # %entry 881 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 882 ; AVX512-NEXT: retq 883 entry: 884 %0 = load <8 x i64>, <8 x i64>* %V, align 4 885 ret <8 x i64> %0 886 } 887 888 define <8 x float> @test_v16f32(<8 x float>* %V) { 889 ; SSE-LABEL: test_v16f32: 890 ; SSE: # %bb.0: # %entry 891 ; SSE-NEXT: movaps (%rdi), %xmm0 892 ; SSE-NEXT: movaps 16(%rdi), %xmm1 893 ; SSE-NEXT: retq 894 ; 895 ; AVX-LABEL: test_v16f32: 896 ; AVX: # %bb.0: # %entry 897 ; AVX-NEXT: vmovaps (%rdi), %ymm0 898 ; AVX-NEXT: retq 899 entry: 900 %0 = load <8 x float>, <8 x float>* %V, align 64 901 ret <8 x float> %0 902 } 903 904 define <8 x double> @test_v8f64(<8 x double>* %V) { 905 ; SSE-LABEL: test_v8f64: 906 ; SSE: # %bb.0: # %entry 907 ; SSE-NEXT: movapd (%rdi), %xmm0 908 ; SSE-NEXT: movapd 16(%rdi), %xmm1 909 ; SSE-NEXT: movapd 32(%rdi), %xmm2 910 ; SSE-NEXT: movapd 48(%rdi), %xmm3 911 ; SSE-NEXT: retq 912 ; 913 ; AVXONLY-LABEL: test_v8f64: 914 ; AVXONLY: # %bb.0: # %entry 915 ; AVXONLY-NEXT: vmovapd (%rdi), %ymm0 916 ; AVXONLY-NEXT: vmovapd 32(%rdi), %ymm1 917 ; AVXONLY-NEXT: retq 918 ; 919 ; AVX512-LABEL: test_v8f64: 920 ; AVX512: # %bb.0: # %entry 921 ; AVX512-NEXT: vmovapd (%rdi), %zmm0 922 ; AVX512-NEXT: retq 923 entry: 924 %0 = load <8 x double>, <8 x double>* %V, align 64 925 ret <8 x double> %0 926 } 927 928 define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) { 929 ; SSE-LABEL: test_v16f32_unaligned: 930 ; SSE: # %bb.0: # %entry 931 ; SSE-NEXT: movups (%rdi), %xmm0 932 ; SSE-NEXT: movups 16(%rdi), %xmm1 933 ; SSE-NEXT: movups 32(%rdi), %xmm2 934 ; SSE-NEXT: movups 48(%rdi), %xmm3 935 ; SSE-NEXT: retq 936 ; 937 ; AVXONLY-LABEL: test_v16f32_unaligned: 938 ; AVXONLY: # %bb.0: # %entry 939 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0 940 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 941 ; AVXONLY-NEXT: retq 942 ; 943 ; AVX512-LABEL: test_v16f32_unaligned: 944 ; AVX512: # %bb.0: # %entry 945 ; AVX512-NEXT: vmovups (%rdi), %zmm0 946 ; AVX512-NEXT: retq 947 entry: 948 %0 = load <16 x float>, <16 x float>* %V, align 4 949 ret <16 x float> %0 950 } 951 952 define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) { 953 ; SSE-LABEL: test_v8f64_unaligned: 954 ; SSE: # %bb.0: # %entry 955 ; SSE-NEXT: movupd (%rdi), %xmm0 956 ; SSE-NEXT: movupd 16(%rdi), %xmm1 957 ; SSE-NEXT: movupd 32(%rdi), %xmm2 958 ; SSE-NEXT: movupd 48(%rdi), %xmm3 959 ; SSE-NEXT: retq 960 ; 961 ; AVXONLY-LABEL: test_v8f64_unaligned: 962 ; AVXONLY: # %bb.0: # %entry 963 ; AVXONLY-NEXT: vmovupd (%rdi), %ymm0 964 ; AVXONLY-NEXT: vmovupd 32(%rdi), %ymm1 965 ; AVXONLY-NEXT: retq 966 ; 967 ; AVX512-LABEL: test_v8f64_unaligned: 968 ; AVX512: # %bb.0: # %entry 969 ; AVX512-NEXT: vmovupd (%rdi), %zmm0 970 ; AVX512-NEXT: retq 971 entry: 972 %0 = load <8 x double>, <8 x double>* %V, align 4 973 ret <8 x double> %0 974 } 975 976