Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
      3 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVXONLY
      4 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
      5 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
      6 
      7 ; Verify that fast-isel knows how to select aligned/unaligned vector loads.
      8 ; Also verify that the selected load instruction is in the correct domain.
      9 
     10 define <16 x i8> @test_v16i8(<16 x i8>* %V) {
     11 ; SSE-LABEL: test_v16i8:
     12 ; SSE:       # %bb.0: # %entry
     13 ; SSE-NEXT:    movdqa (%rdi), %xmm0
     14 ; SSE-NEXT:    retq
     15 ;
     16 ; AVXONLY-LABEL: test_v16i8:
     17 ; AVXONLY:       # %bb.0: # %entry
     18 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
     19 ; AVXONLY-NEXT:    retq
     20 ;
     21 ; KNL-LABEL: test_v16i8:
     22 ; KNL:       # %bb.0: # %entry
     23 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
     24 ; KNL-NEXT:    retq
     25 ;
     26 ; SKX-LABEL: test_v16i8:
     27 ; SKX:       # %bb.0: # %entry
     28 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
     29 ; SKX-NEXT:    retq
     30 entry:
     31   %0 = load <16 x i8>, <16 x i8>* %V, align 16
     32   ret <16 x i8> %0
     33 }
     34 
     35 define <8 x i16> @test_v8i16(<8 x i16>* %V) {
     36 ; SSE-LABEL: test_v8i16:
     37 ; SSE:       # %bb.0: # %entry
     38 ; SSE-NEXT:    movdqa (%rdi), %xmm0
     39 ; SSE-NEXT:    retq
     40 ;
     41 ; AVXONLY-LABEL: test_v8i16:
     42 ; AVXONLY:       # %bb.0: # %entry
     43 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
     44 ; AVXONLY-NEXT:    retq
     45 ;
     46 ; KNL-LABEL: test_v8i16:
     47 ; KNL:       # %bb.0: # %entry
     48 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
     49 ; KNL-NEXT:    retq
     50 ;
     51 ; SKX-LABEL: test_v8i16:
     52 ; SKX:       # %bb.0: # %entry
     53 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
     54 ; SKX-NEXT:    retq
     55 entry:
     56   %0 = load <8 x i16>, <8 x i16>* %V, align 16
     57   ret <8 x i16> %0
     58 }
     59 
     60 define <4 x i32> @test_v4i32(<4 x i32>* %V) {
     61 ; SSE-LABEL: test_v4i32:
     62 ; SSE:       # %bb.0: # %entry
     63 ; SSE-NEXT:    movdqa (%rdi), %xmm0
     64 ; SSE-NEXT:    retq
     65 ;
     66 ; AVXONLY-LABEL: test_v4i32:
     67 ; AVXONLY:       # %bb.0: # %entry
     68 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
     69 ; AVXONLY-NEXT:    retq
     70 ;
     71 ; KNL-LABEL: test_v4i32:
     72 ; KNL:       # %bb.0: # %entry
     73 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
     74 ; KNL-NEXT:    retq
     75 ;
     76 ; SKX-LABEL: test_v4i32:
     77 ; SKX:       # %bb.0: # %entry
     78 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
     79 ; SKX-NEXT:    retq
     80 entry:
     81   %0 = load <4 x i32>, <4 x i32>* %V, align 16
     82   ret <4 x i32> %0
     83 }
     84 
     85 define <2 x i64> @test_v2i64(<2 x i64>* %V) {
     86 ; SSE-LABEL: test_v2i64:
     87 ; SSE:       # %bb.0: # %entry
     88 ; SSE-NEXT:    movdqa (%rdi), %xmm0
     89 ; SSE-NEXT:    retq
     90 ;
     91 ; AVXONLY-LABEL: test_v2i64:
     92 ; AVXONLY:       # %bb.0: # %entry
     93 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
     94 ; AVXONLY-NEXT:    retq
     95 ;
     96 ; KNL-LABEL: test_v2i64:
     97 ; KNL:       # %bb.0: # %entry
     98 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
     99 ; KNL-NEXT:    retq
    100 ;
    101 ; SKX-LABEL: test_v2i64:
    102 ; SKX:       # %bb.0: # %entry
    103 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
    104 ; SKX-NEXT:    retq
    105 entry:
    106   %0 = load <2 x i64>, <2 x i64>* %V, align 16
    107   ret <2 x i64> %0
    108 }
    109 
    110 define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
    111 ; SSE-LABEL: test_v16i8_unaligned:
    112 ; SSE:       # %bb.0: # %entry
    113 ; SSE-NEXT:    movdqu (%rdi), %xmm0
    114 ; SSE-NEXT:    retq
    115 ;
    116 ; AVXONLY-LABEL: test_v16i8_unaligned:
    117 ; AVXONLY:       # %bb.0: # %entry
    118 ; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
    119 ; AVXONLY-NEXT:    retq
    120 ;
    121 ; KNL-LABEL: test_v16i8_unaligned:
    122 ; KNL:       # %bb.0: # %entry
    123 ; KNL-NEXT:    vmovdqu (%rdi), %xmm0
    124 ; KNL-NEXT:    retq
    125 ;
    126 ; SKX-LABEL: test_v16i8_unaligned:
    127 ; SKX:       # %bb.0: # %entry
    128 ; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
    129 ; SKX-NEXT:    retq
    130 entry:
    131   %0 = load <16 x i8>, <16 x i8>* %V, align 4
    132   ret <16 x i8> %0
    133 }
    134 
    135 define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
    136 ; SSE-LABEL: test_v8i16_unaligned:
    137 ; SSE:       # %bb.0: # %entry
    138 ; SSE-NEXT:    movdqu (%rdi), %xmm0
    139 ; SSE-NEXT:    retq
    140 ;
    141 ; AVXONLY-LABEL: test_v8i16_unaligned:
    142 ; AVXONLY:       # %bb.0: # %entry
    143 ; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
    144 ; AVXONLY-NEXT:    retq
    145 ;
    146 ; KNL-LABEL: test_v8i16_unaligned:
    147 ; KNL:       # %bb.0: # %entry
    148 ; KNL-NEXT:    vmovdqu (%rdi), %xmm0
    149 ; KNL-NEXT:    retq
    150 ;
    151 ; SKX-LABEL: test_v8i16_unaligned:
    152 ; SKX:       # %bb.0: # %entry
    153 ; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
    154 ; SKX-NEXT:    retq
    155 entry:
    156   %0 = load <8 x i16>, <8 x i16>* %V, align 4
    157   ret <8 x i16> %0
    158 }
    159 
    160 define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
    161 ; SSE-LABEL: test_v4i32_unaligned:
    162 ; SSE:       # %bb.0: # %entry
    163 ; SSE-NEXT:    movdqu (%rdi), %xmm0
    164 ; SSE-NEXT:    retq
    165 ;
    166 ; AVXONLY-LABEL: test_v4i32_unaligned:
    167 ; AVXONLY:       # %bb.0: # %entry
    168 ; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
    169 ; AVXONLY-NEXT:    retq
    170 ;
    171 ; KNL-LABEL: test_v4i32_unaligned:
    172 ; KNL:       # %bb.0: # %entry
    173 ; KNL-NEXT:    vmovdqu (%rdi), %xmm0
    174 ; KNL-NEXT:    retq
    175 ;
    176 ; SKX-LABEL: test_v4i32_unaligned:
    177 ; SKX:       # %bb.0: # %entry
    178 ; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
    179 ; SKX-NEXT:    retq
    180 entry:
    181   %0 = load <4 x i32>, <4 x i32>* %V, align 4
    182   ret <4 x i32> %0
    183 }
    184 
    185 define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
    186 ; SSE-LABEL: test_v2i64_unaligned:
    187 ; SSE:       # %bb.0: # %entry
    188 ; SSE-NEXT:    movdqu (%rdi), %xmm0
    189 ; SSE-NEXT:    retq
    190 ;
    191 ; AVXONLY-LABEL: test_v2i64_unaligned:
    192 ; AVXONLY:       # %bb.0: # %entry
    193 ; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
    194 ; AVXONLY-NEXT:    retq
    195 ;
    196 ; KNL-LABEL: test_v2i64_unaligned:
    197 ; KNL:       # %bb.0: # %entry
    198 ; KNL-NEXT:    vmovdqu (%rdi), %xmm0
    199 ; KNL-NEXT:    retq
    200 ;
    201 ; SKX-LABEL: test_v2i64_unaligned:
    202 ; SKX:       # %bb.0: # %entry
    203 ; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
    204 ; SKX-NEXT:    retq
    205 entry:
    206   %0 = load <2 x i64>, <2 x i64>* %V, align 4
    207   ret <2 x i64> %0
    208 }
    209 
    210 define <4 x float> @test_v4f32(<4 x float>* %V) {
    211 ; SSE-LABEL: test_v4f32:
    212 ; SSE:       # %bb.0: # %entry
    213 ; SSE-NEXT:    movaps (%rdi), %xmm0
    214 ; SSE-NEXT:    retq
    215 ;
    216 ; AVX-LABEL: test_v4f32:
    217 ; AVX:       # %bb.0: # %entry
    218 ; AVX-NEXT:    vmovaps (%rdi), %xmm0
    219 ; AVX-NEXT:    retq
    220 entry:
    221   %0 = load <4 x float>, <4 x float>* %V, align 16
    222   ret <4 x float> %0
    223 }
    224 
    225 define <2 x double> @test_v2f64(<2 x double>* %V) {
    226 ; SSE-LABEL: test_v2f64:
    227 ; SSE:       # %bb.0: # %entry
    228 ; SSE-NEXT:    movapd (%rdi), %xmm0
    229 ; SSE-NEXT:    retq
    230 ;
    231 ; AVX-LABEL: test_v2f64:
    232 ; AVX:       # %bb.0: # %entry
    233 ; AVX-NEXT:    vmovapd (%rdi), %xmm0
    234 ; AVX-NEXT:    retq
    235 entry:
    236   %0 = load <2 x double>, <2 x double>* %V, align 16
    237   ret <2 x double> %0
    238 }
    239 
    240 define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
    241 ; SSE-LABEL: test_v4f32_unaligned:
    242 ; SSE:       # %bb.0: # %entry
    243 ; SSE-NEXT:    movups (%rdi), %xmm0
    244 ; SSE-NEXT:    retq
    245 ;
    246 ; AVX-LABEL: test_v4f32_unaligned:
    247 ; AVX:       # %bb.0: # %entry
    248 ; AVX-NEXT:    vmovups (%rdi), %xmm0
    249 ; AVX-NEXT:    retq
    250 entry:
    251   %0 = load <4 x float>, <4 x float>* %V, align 4
    252   ret <4 x float> %0
    253 }
    254 
    255 define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
    256 ; SSE-LABEL: test_v2f64_unaligned:
    257 ; SSE:       # %bb.0: # %entry
    258 ; SSE-NEXT:    movupd (%rdi), %xmm0
    259 ; SSE-NEXT:    retq
    260 ;
    261 ; AVX-LABEL: test_v2f64_unaligned:
    262 ; AVX:       # %bb.0: # %entry
    263 ; AVX-NEXT:    vmovupd (%rdi), %xmm0
    264 ; AVX-NEXT:    retq
    265 entry:
    266   %0 = load <2 x double>, <2 x double>* %V, align 4
    267   ret <2 x double> %0
    268 }
    269 
    270 define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
    271 ; SSE-LABEL: test_v16i8_abi_alignment:
    272 ; SSE:       # %bb.0: # %entry
    273 ; SSE-NEXT:    movdqa (%rdi), %xmm0
    274 ; SSE-NEXT:    retq
    275 ;
    276 ; AVXONLY-LABEL: test_v16i8_abi_alignment:
    277 ; AVXONLY:       # %bb.0: # %entry
    278 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
    279 ; AVXONLY-NEXT:    retq
    280 ;
    281 ; KNL-LABEL: test_v16i8_abi_alignment:
    282 ; KNL:       # %bb.0: # %entry
    283 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
    284 ; KNL-NEXT:    retq
    285 ;
    286 ; SKX-LABEL: test_v16i8_abi_alignment:
    287 ; SKX:       # %bb.0: # %entry
    288 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
    289 ; SKX-NEXT:    retq
    290 entry:
    291   %0 = load <16 x i8>, <16 x i8>* %V
    292   ret <16 x i8> %0
    293 }
    294 
    295 define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
    296 ; SSE-LABEL: test_v8i16_abi_alignment:
    297 ; SSE:       # %bb.0: # %entry
    298 ; SSE-NEXT:    movdqa (%rdi), %xmm0
    299 ; SSE-NEXT:    retq
    300 ;
    301 ; AVXONLY-LABEL: test_v8i16_abi_alignment:
    302 ; AVXONLY:       # %bb.0: # %entry
    303 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
    304 ; AVXONLY-NEXT:    retq
    305 ;
    306 ; KNL-LABEL: test_v8i16_abi_alignment:
    307 ; KNL:       # %bb.0: # %entry
    308 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
    309 ; KNL-NEXT:    retq
    310 ;
    311 ; SKX-LABEL: test_v8i16_abi_alignment:
    312 ; SKX:       # %bb.0: # %entry
    313 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
    314 ; SKX-NEXT:    retq
    315 entry:
    316   %0 = load <8 x i16>, <8 x i16>* %V
    317   ret <8 x i16> %0
    318 }
    319 
    320 define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
    321 ; SSE-LABEL: test_v4i32_abi_alignment:
    322 ; SSE:       # %bb.0: # %entry
    323 ; SSE-NEXT:    movdqa (%rdi), %xmm0
    324 ; SSE-NEXT:    retq
    325 ;
    326 ; AVXONLY-LABEL: test_v4i32_abi_alignment:
    327 ; AVXONLY:       # %bb.0: # %entry
    328 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
    329 ; AVXONLY-NEXT:    retq
    330 ;
    331 ; KNL-LABEL: test_v4i32_abi_alignment:
    332 ; KNL:       # %bb.0: # %entry
    333 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
    334 ; KNL-NEXT:    retq
    335 ;
    336 ; SKX-LABEL: test_v4i32_abi_alignment:
    337 ; SKX:       # %bb.0: # %entry
    338 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
    339 ; SKX-NEXT:    retq
    340 entry:
    341   %0 = load <4 x i32>, <4 x i32>* %V
    342   ret <4 x i32> %0
    343 }
    344 
    345 define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
    346 ; SSE-LABEL: test_v2i64_abi_alignment:
    347 ; SSE:       # %bb.0: # %entry
    348 ; SSE-NEXT:    movdqa (%rdi), %xmm0
    349 ; SSE-NEXT:    retq
    350 ;
    351 ; AVXONLY-LABEL: test_v2i64_abi_alignment:
    352 ; AVXONLY:       # %bb.0: # %entry
    353 ; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
    354 ; AVXONLY-NEXT:    retq
    355 ;
    356 ; KNL-LABEL: test_v2i64_abi_alignment:
    357 ; KNL:       # %bb.0: # %entry
    358 ; KNL-NEXT:    vmovdqa (%rdi), %xmm0
    359 ; KNL-NEXT:    retq
    360 ;
    361 ; SKX-LABEL: test_v2i64_abi_alignment:
    362 ; SKX:       # %bb.0: # %entry
    363 ; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
    364 ; SKX-NEXT:    retq
    365 entry:
    366   %0 = load <2 x i64>, <2 x i64>* %V
    367   ret <2 x i64> %0
    368 }
    369 
    370 define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
    371 ; SSE-LABEL: test_v4f32_abi_alignment:
    372 ; SSE:       # %bb.0: # %entry
    373 ; SSE-NEXT:    movaps (%rdi), %xmm0
    374 ; SSE-NEXT:    retq
    375 ;
    376 ; AVX-LABEL: test_v4f32_abi_alignment:
    377 ; AVX:       # %bb.0: # %entry
    378 ; AVX-NEXT:    vmovaps (%rdi), %xmm0
    379 ; AVX-NEXT:    retq
    380 entry:
    381   %0 = load <4 x float>, <4 x float>* %V
    382   ret <4 x float> %0
    383 }
    384 
    385 define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
    386 ; SSE-LABEL: test_v2f64_abi_alignment:
    387 ; SSE:       # %bb.0: # %entry
    388 ; SSE-NEXT:    movapd (%rdi), %xmm0
    389 ; SSE-NEXT:    retq
    390 ;
    391 ; AVX-LABEL: test_v2f64_abi_alignment:
    392 ; AVX:       # %bb.0: # %entry
    393 ; AVX-NEXT:    vmovapd (%rdi), %xmm0
    394 ; AVX-NEXT:    retq
    395 entry:
    396   %0 = load <2 x double>, <2 x double>* %V
    397   ret <2 x double> %0
    398 }
    399 
    400 define <32 x i8> @test_v32i8(<32 x i8>* %V) {
    401 ; SSE-LABEL: test_v32i8:
    402 ; SSE:       # %bb.0: # %entry
    403 ; SSE-NEXT:    movaps (%rdi), %xmm0
    404 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    405 ; SSE-NEXT:    retq
    406 ;
    407 ; AVXONLY-LABEL: test_v32i8:
    408 ; AVXONLY:       # %bb.0: # %entry
    409 ; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
    410 ; AVXONLY-NEXT:    retq
    411 ;
    412 ; KNL-LABEL: test_v32i8:
    413 ; KNL:       # %bb.0: # %entry
    414 ; KNL-NEXT:    vmovdqa (%rdi), %ymm0
    415 ; KNL-NEXT:    retq
    416 ;
    417 ; SKX-LABEL: test_v32i8:
    418 ; SKX:       # %bb.0: # %entry
    419 ; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
    420 ; SKX-NEXT:    retq
    421 entry:
    422   %0 = load <32 x i8>, <32 x i8>* %V, align 32
    423   ret <32 x i8> %0
    424 }
    425 
    426 define <16 x i16> @test_v16i16(<16 x i16>* %V) {
    427 ; SSE-LABEL: test_v16i16:
    428 ; SSE:       # %bb.0: # %entry
    429 ; SSE-NEXT:    movaps (%rdi), %xmm0
    430 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    431 ; SSE-NEXT:    retq
    432 ;
    433 ; AVXONLY-LABEL: test_v16i16:
    434 ; AVXONLY:       # %bb.0: # %entry
    435 ; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
    436 ; AVXONLY-NEXT:    retq
    437 ;
    438 ; KNL-LABEL: test_v16i16:
    439 ; KNL:       # %bb.0: # %entry
    440 ; KNL-NEXT:    vmovdqa (%rdi), %ymm0
    441 ; KNL-NEXT:    retq
    442 ;
    443 ; SKX-LABEL: test_v16i16:
    444 ; SKX:       # %bb.0: # %entry
    445 ; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
    446 ; SKX-NEXT:    retq
    447 entry:
    448   %0 = load <16 x i16>, <16 x i16>* %V, align 32
    449   ret <16 x i16> %0
    450 }
    451 
    452 define <8 x i32> @test_v8i32(<8 x i32>* %V) {
    453 ; SSE-LABEL: test_v8i32:
    454 ; SSE:       # %bb.0: # %entry
    455 ; SSE-NEXT:    movaps (%rdi), %xmm0
    456 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    457 ; SSE-NEXT:    retq
    458 ;
    459 ; AVXONLY-LABEL: test_v8i32:
    460 ; AVXONLY:       # %bb.0: # %entry
    461 ; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
    462 ; AVXONLY-NEXT:    retq
    463 ;
    464 ; KNL-LABEL: test_v8i32:
    465 ; KNL:       # %bb.0: # %entry
    466 ; KNL-NEXT:    vmovdqa (%rdi), %ymm0
    467 ; KNL-NEXT:    retq
    468 ;
    469 ; SKX-LABEL: test_v8i32:
    470 ; SKX:       # %bb.0: # %entry
    471 ; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
    472 ; SKX-NEXT:    retq
    473 entry:
    474   %0 = load <8 x i32>, <8 x i32>* %V, align 32
    475   ret <8 x i32> %0
    476 }
    477 
    478 define <4 x i64> @test_v4i64(<4 x i64>* %V) {
    479 ; SSE-LABEL: test_v4i64:
    480 ; SSE:       # %bb.0: # %entry
    481 ; SSE-NEXT:    movaps (%rdi), %xmm0
    482 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    483 ; SSE-NEXT:    retq
    484 ;
    485 ; AVXONLY-LABEL: test_v4i64:
    486 ; AVXONLY:       # %bb.0: # %entry
    487 ; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
    488 ; AVXONLY-NEXT:    retq
    489 ;
    490 ; KNL-LABEL: test_v4i64:
    491 ; KNL:       # %bb.0: # %entry
    492 ; KNL-NEXT:    vmovdqa (%rdi), %ymm0
    493 ; KNL-NEXT:    retq
    494 ;
    495 ; SKX-LABEL: test_v4i64:
    496 ; SKX:       # %bb.0: # %entry
    497 ; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
    498 ; SKX-NEXT:    retq
    499 entry:
    500   %0 = load <4 x i64>, <4 x i64>* %V, align 32
    501   ret <4 x i64> %0
    502 }
    503 
    504 define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) {
    505 ; SSE-LABEL: test_v32i8_unaligned:
    506 ; SSE:       # %bb.0: # %entry
    507 ; SSE-NEXT:    movups (%rdi), %xmm0
    508 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    509 ; SSE-NEXT:    retq
    510 ;
    511 ; AVXONLY-LABEL: test_v32i8_unaligned:
    512 ; AVXONLY:       # %bb.0: # %entry
    513 ; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
    514 ; AVXONLY-NEXT:    retq
    515 ;
    516 ; KNL-LABEL: test_v32i8_unaligned:
    517 ; KNL:       # %bb.0: # %entry
    518 ; KNL-NEXT:    vmovdqu (%rdi), %ymm0
    519 ; KNL-NEXT:    retq
    520 ;
    521 ; SKX-LABEL: test_v32i8_unaligned:
    522 ; SKX:       # %bb.0: # %entry
    523 ; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
    524 ; SKX-NEXT:    retq
    525 entry:
    526   %0 = load <32 x i8>, <32 x i8>* %V, align 4
    527   ret <32 x i8> %0
    528 }
    529 
    530 define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) {
    531 ; SSE-LABEL: test_v16i16_unaligned:
    532 ; SSE:       # %bb.0: # %entry
    533 ; SSE-NEXT:    movups (%rdi), %xmm0
    534 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    535 ; SSE-NEXT:    retq
    536 ;
    537 ; AVXONLY-LABEL: test_v16i16_unaligned:
    538 ; AVXONLY:       # %bb.0: # %entry
    539 ; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
    540 ; AVXONLY-NEXT:    retq
    541 ;
    542 ; KNL-LABEL: test_v16i16_unaligned:
    543 ; KNL:       # %bb.0: # %entry
    544 ; KNL-NEXT:    vmovdqu (%rdi), %ymm0
    545 ; KNL-NEXT:    retq
    546 ;
    547 ; SKX-LABEL: test_v16i16_unaligned:
    548 ; SKX:       # %bb.0: # %entry
    549 ; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
    550 ; SKX-NEXT:    retq
    551 entry:
    552   %0 = load <16 x i16>, <16 x i16>* %V, align 4
    553   ret <16 x i16> %0
    554 }
    555 
    556 define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) {
    557 ; SSE-LABEL: test_v8i32_unaligned:
    558 ; SSE:       # %bb.0: # %entry
    559 ; SSE-NEXT:    movups (%rdi), %xmm0
    560 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    561 ; SSE-NEXT:    retq
    562 ;
    563 ; AVXONLY-LABEL: test_v8i32_unaligned:
    564 ; AVXONLY:       # %bb.0: # %entry
    565 ; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
    566 ; AVXONLY-NEXT:    retq
    567 ;
    568 ; KNL-LABEL: test_v8i32_unaligned:
    569 ; KNL:       # %bb.0: # %entry
    570 ; KNL-NEXT:    vmovdqu (%rdi), %ymm0
    571 ; KNL-NEXT:    retq
    572 ;
    573 ; SKX-LABEL: test_v8i32_unaligned:
    574 ; SKX:       # %bb.0: # %entry
    575 ; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
    576 ; SKX-NEXT:    retq
    577 entry:
    578   %0 = load <8 x i32>, <8 x i32>* %V, align 4
    579   ret <8 x i32> %0
    580 }
    581 
    582 define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) {
    583 ; SSE-LABEL: test_v4i64_unaligned:
    584 ; SSE:       # %bb.0: # %entry
    585 ; SSE-NEXT:    movups (%rdi), %xmm0
    586 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    587 ; SSE-NEXT:    retq
    588 ;
    589 ; AVXONLY-LABEL: test_v4i64_unaligned:
    590 ; AVXONLY:       # %bb.0: # %entry
    591 ; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
    592 ; AVXONLY-NEXT:    retq
    593 ;
    594 ; KNL-LABEL: test_v4i64_unaligned:
    595 ; KNL:       # %bb.0: # %entry
    596 ; KNL-NEXT:    vmovdqu (%rdi), %ymm0
    597 ; KNL-NEXT:    retq
    598 ;
    599 ; SKX-LABEL: test_v4i64_unaligned:
    600 ; SKX:       # %bb.0: # %entry
    601 ; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
    602 ; SKX-NEXT:    retq
    603 entry:
    604   %0 = load <4 x i64>, <4 x i64>* %V, align 4
    605   ret <4 x i64> %0
    606 }
    607 
    608 define <8 x float> @test_v8f32(<8 x float>* %V) {
    609 ; SSE-LABEL: test_v8f32:
    610 ; SSE:       # %bb.0: # %entry
    611 ; SSE-NEXT:    movaps (%rdi), %xmm0
    612 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    613 ; SSE-NEXT:    retq
    614 ;
    615 ; AVX-LABEL: test_v8f32:
    616 ; AVX:       # %bb.0: # %entry
    617 ; AVX-NEXT:    vmovaps (%rdi), %ymm0
    618 ; AVX-NEXT:    retq
    619 entry:
    620   %0 = load <8 x float>, <8 x float>* %V, align 32
    621   ret <8 x float> %0
    622 }
    623 
    624 define <4 x double> @test_v4f64(<4 x double>* %V) {
    625 ; SSE-LABEL: test_v4f64:
    626 ; SSE:       # %bb.0: # %entry
    627 ; SSE-NEXT:    movapd (%rdi), %xmm0
    628 ; SSE-NEXT:    movapd 16(%rdi), %xmm1
    629 ; SSE-NEXT:    retq
    630 ;
    631 ; AVX-LABEL: test_v4f64:
    632 ; AVX:       # %bb.0: # %entry
    633 ; AVX-NEXT:    vmovapd (%rdi), %ymm0
    634 ; AVX-NEXT:    retq
    635 entry:
    636   %0 = load <4 x double>, <4 x double>* %V, align 32
    637   ret <4 x double> %0
    638 }
    639 
    640 define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) {
    641 ; SSE-LABEL: test_v8f32_unaligned:
    642 ; SSE:       # %bb.0: # %entry
    643 ; SSE-NEXT:    movups (%rdi), %xmm0
    644 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    645 ; SSE-NEXT:    retq
    646 ;
    647 ; AVX-LABEL: test_v8f32_unaligned:
    648 ; AVX:       # %bb.0: # %entry
    649 ; AVX-NEXT:    vmovups (%rdi), %ymm0
    650 ; AVX-NEXT:    retq
    651 entry:
    652   %0 = load <8 x float>, <8 x float>* %V, align 4
    653   ret <8 x float> %0
    654 }
    655 
    656 define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) {
    657 ; SSE-LABEL: test_v4f64_unaligned:
    658 ; SSE:       # %bb.0: # %entry
    659 ; SSE-NEXT:    movupd (%rdi), %xmm0
    660 ; SSE-NEXT:    movupd 16(%rdi), %xmm1
    661 ; SSE-NEXT:    retq
    662 ;
    663 ; AVX-LABEL: test_v4f64_unaligned:
    664 ; AVX:       # %bb.0: # %entry
    665 ; AVX-NEXT:    vmovupd (%rdi), %ymm0
    666 ; AVX-NEXT:    retq
    667 entry:
    668   %0 = load <4 x double>, <4 x double>* %V, align 4
    669   ret <4 x double> %0
    670 }
    671 
    672 define <64 x i8> @test_v64i8(<64 x i8>* %V) {
    673 ; SSE-LABEL: test_v64i8:
    674 ; SSE:       # %bb.0: # %entry
    675 ; SSE-NEXT:    movaps (%rdi), %xmm0
    676 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    677 ; SSE-NEXT:    movaps 32(%rdi), %xmm2
    678 ; SSE-NEXT:    movaps 48(%rdi), %xmm3
    679 ; SSE-NEXT:    retq
    680 ;
    681 ; AVXONLY-LABEL: test_v64i8:
    682 ; AVXONLY:       # %bb.0: # %entry
    683 ; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
    684 ; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
    685 ; AVXONLY-NEXT:    retq
    686 ;
    687 ; KNL-LABEL: test_v64i8:
    688 ; KNL:       # %bb.0: # %entry
    689 ; KNL-NEXT:    vmovaps (%rdi), %ymm0
    690 ; KNL-NEXT:    vmovaps 32(%rdi), %ymm1
    691 ; KNL-NEXT:    retq
    692 ;
    693 ; SKX-LABEL: test_v64i8:
    694 ; SKX:       # %bb.0: # %entry
    695 ; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0
    696 ; SKX-NEXT:    retq
    697 entry:
    698   %0 = load <64 x i8>, <64 x i8>* %V, align 64
    699   ret <64 x i8> %0
    700 }
    701 
    702 define <32 x i16> @test_v32i16(<32 x i16>* %V) {
    703 ; SSE-LABEL: test_v32i16:
    704 ; SSE:       # %bb.0: # %entry
    705 ; SSE-NEXT:    movaps (%rdi), %xmm0
    706 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    707 ; SSE-NEXT:    movaps 32(%rdi), %xmm2
    708 ; SSE-NEXT:    movaps 48(%rdi), %xmm3
    709 ; SSE-NEXT:    retq
    710 ;
    711 ; AVXONLY-LABEL: test_v32i16:
    712 ; AVXONLY:       # %bb.0: # %entry
    713 ; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
    714 ; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
    715 ; AVXONLY-NEXT:    retq
    716 ;
    717 ; KNL-LABEL: test_v32i16:
    718 ; KNL:       # %bb.0: # %entry
    719 ; KNL-NEXT:    vmovaps (%rdi), %ymm0
    720 ; KNL-NEXT:    vmovaps 32(%rdi), %ymm1
    721 ; KNL-NEXT:    retq
    722 ;
    723 ; SKX-LABEL: test_v32i16:
    724 ; SKX:       # %bb.0: # %entry
    725 ; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0
    726 ; SKX-NEXT:    retq
    727 entry:
    728   %0 = load <32 x i16>, <32 x i16>* %V, align 64
    729   ret <32 x i16> %0
    730 }
    731 
    732 define <16 x i32> @test_v16i32(<16 x i32>* %V) {
    733 ; SSE-LABEL: test_v16i32:
    734 ; SSE:       # %bb.0: # %entry
    735 ; SSE-NEXT:    movaps (%rdi), %xmm0
    736 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    737 ; SSE-NEXT:    movaps 32(%rdi), %xmm2
    738 ; SSE-NEXT:    movaps 48(%rdi), %xmm3
    739 ; SSE-NEXT:    retq
    740 ;
    741 ; AVXONLY-LABEL: test_v16i32:
    742 ; AVXONLY:       # %bb.0: # %entry
    743 ; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
    744 ; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
    745 ; AVXONLY-NEXT:    retq
    746 ;
    747 ; AVX512-LABEL: test_v16i32:
    748 ; AVX512:       # %bb.0: # %entry
    749 ; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
    750 ; AVX512-NEXT:    retq
    751 entry:
    752   %0 = load <16 x i32>, <16 x i32>* %V, align 64
    753   ret <16 x i32> %0
    754 }
    755 
    756 define <8 x i64> @test_v8i64(<8 x i64>* %V) {
    757 ; SSE-LABEL: test_v8i64:
    758 ; SSE:       # %bb.0: # %entry
    759 ; SSE-NEXT:    movaps (%rdi), %xmm0
    760 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    761 ; SSE-NEXT:    movaps 32(%rdi), %xmm2
    762 ; SSE-NEXT:    movaps 48(%rdi), %xmm3
    763 ; SSE-NEXT:    retq
    764 ;
    765 ; AVXONLY-LABEL: test_v8i64:
    766 ; AVXONLY:       # %bb.0: # %entry
    767 ; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
    768 ; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
    769 ; AVXONLY-NEXT:    retq
    770 ;
    771 ; AVX512-LABEL: test_v8i64:
    772 ; AVX512:       # %bb.0: # %entry
    773 ; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
    774 ; AVX512-NEXT:    retq
    775 entry:
    776   %0 = load <8 x i64>, <8 x i64>* %V, align 64
    777   ret <8 x i64> %0
    778 }
    779 
    780 define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) {
    781 ; SSE-LABEL: test_v64i8_unaligned:
    782 ; SSE:       # %bb.0: # %entry
    783 ; SSE-NEXT:    movups (%rdi), %xmm0
    784 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    785 ; SSE-NEXT:    movups 32(%rdi), %xmm2
    786 ; SSE-NEXT:    movups 48(%rdi), %xmm3
    787 ; SSE-NEXT:    retq
    788 ;
    789 ; AVXONLY-LABEL: test_v64i8_unaligned:
    790 ; AVXONLY:       # %bb.0: # %entry
    791 ; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
    792 ; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
    793 ; AVXONLY-NEXT:    retq
    794 ;
    795 ; KNL-LABEL: test_v64i8_unaligned:
    796 ; KNL:       # %bb.0: # %entry
    797 ; KNL-NEXT:    vmovups (%rdi), %ymm0
    798 ; KNL-NEXT:    vmovups 32(%rdi), %ymm1
    799 ; KNL-NEXT:    retq
    800 ;
    801 ; SKX-LABEL: test_v64i8_unaligned:
    802 ; SKX:       # %bb.0: # %entry
    803 ; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0
    804 ; SKX-NEXT:    retq
    805 entry:
    806   %0 = load <64 x i8>, <64 x i8>* %V, align 4
    807   ret <64 x i8> %0
    808 }
    809 
    810 define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) {
    811 ; SSE-LABEL: test_v32i16_unaligned:
    812 ; SSE:       # %bb.0: # %entry
    813 ; SSE-NEXT:    movups (%rdi), %xmm0
    814 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    815 ; SSE-NEXT:    movups 32(%rdi), %xmm2
    816 ; SSE-NEXT:    movups 48(%rdi), %xmm3
    817 ; SSE-NEXT:    retq
    818 ;
    819 ; AVXONLY-LABEL: test_v32i16_unaligned:
    820 ; AVXONLY:       # %bb.0: # %entry
    821 ; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
    822 ; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
    823 ; AVXONLY-NEXT:    retq
    824 ;
    825 ; KNL-LABEL: test_v32i16_unaligned:
    826 ; KNL:       # %bb.0: # %entry
    827 ; KNL-NEXT:    vmovups (%rdi), %ymm0
    828 ; KNL-NEXT:    vmovups 32(%rdi), %ymm1
    829 ; KNL-NEXT:    retq
    830 ;
    831 ; SKX-LABEL: test_v32i16_unaligned:
    832 ; SKX:       # %bb.0: # %entry
    833 ; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0
    834 ; SKX-NEXT:    retq
    835 entry:
    836   %0 = load <32 x i16>, <32 x i16>* %V, align 4
    837   ret <32 x i16> %0
    838 }
    839 
    840 define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) {
    841 ; SSE-LABEL: test_v16i32_unaligned:
    842 ; SSE:       # %bb.0: # %entry
    843 ; SSE-NEXT:    movups (%rdi), %xmm0
    844 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    845 ; SSE-NEXT:    movups 32(%rdi), %xmm2
    846 ; SSE-NEXT:    movups 48(%rdi), %xmm3
    847 ; SSE-NEXT:    retq
    848 ;
    849 ; AVXONLY-LABEL: test_v16i32_unaligned:
    850 ; AVXONLY:       # %bb.0: # %entry
    851 ; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
    852 ; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
    853 ; AVXONLY-NEXT:    retq
    854 ;
    855 ; AVX512-LABEL: test_v16i32_unaligned:
    856 ; AVX512:       # %bb.0: # %entry
    857 ; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
    858 ; AVX512-NEXT:    retq
    859 entry:
    860   %0 = load <16 x i32>, <16 x i32>* %V, align 4
    861   ret <16 x i32> %0
    862 }
    863 
    864 define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) {
    865 ; SSE-LABEL: test_v8i64_unaligned:
    866 ; SSE:       # %bb.0: # %entry
    867 ; SSE-NEXT:    movups (%rdi), %xmm0
    868 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    869 ; SSE-NEXT:    movups 32(%rdi), %xmm2
    870 ; SSE-NEXT:    movups 48(%rdi), %xmm3
    871 ; SSE-NEXT:    retq
    872 ;
    873 ; AVXONLY-LABEL: test_v8i64_unaligned:
    874 ; AVXONLY:       # %bb.0: # %entry
    875 ; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
    876 ; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
    877 ; AVXONLY-NEXT:    retq
    878 ;
    879 ; AVX512-LABEL: test_v8i64_unaligned:
    880 ; AVX512:       # %bb.0: # %entry
    881 ; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
    882 ; AVX512-NEXT:    retq
    883 entry:
    884   %0 = load <8 x i64>, <8 x i64>* %V, align 4
    885   ret <8 x i64> %0
    886 }
    887 
    888 define <8 x float> @test_v16f32(<8 x float>* %V) {
    889 ; SSE-LABEL: test_v16f32:
    890 ; SSE:       # %bb.0: # %entry
    891 ; SSE-NEXT:    movaps (%rdi), %xmm0
    892 ; SSE-NEXT:    movaps 16(%rdi), %xmm1
    893 ; SSE-NEXT:    retq
    894 ;
    895 ; AVX-LABEL: test_v16f32:
    896 ; AVX:       # %bb.0: # %entry
    897 ; AVX-NEXT:    vmovaps (%rdi), %ymm0
    898 ; AVX-NEXT:    retq
    899 entry:
    900   %0 = load <8 x float>, <8 x float>* %V, align 64
    901   ret <8 x float> %0
    902 }
    903 
    904 define <8 x double> @test_v8f64(<8 x double>* %V) {
    905 ; SSE-LABEL: test_v8f64:
    906 ; SSE:       # %bb.0: # %entry
    907 ; SSE-NEXT:    movapd (%rdi), %xmm0
    908 ; SSE-NEXT:    movapd 16(%rdi), %xmm1
    909 ; SSE-NEXT:    movapd 32(%rdi), %xmm2
    910 ; SSE-NEXT:    movapd 48(%rdi), %xmm3
    911 ; SSE-NEXT:    retq
    912 ;
    913 ; AVXONLY-LABEL: test_v8f64:
    914 ; AVXONLY:       # %bb.0: # %entry
    915 ; AVXONLY-NEXT:    vmovapd (%rdi), %ymm0
    916 ; AVXONLY-NEXT:    vmovapd 32(%rdi), %ymm1
    917 ; AVXONLY-NEXT:    retq
    918 ;
    919 ; AVX512-LABEL: test_v8f64:
    920 ; AVX512:       # %bb.0: # %entry
    921 ; AVX512-NEXT:    vmovapd (%rdi), %zmm0
    922 ; AVX512-NEXT:    retq
    923 entry:
    924   %0 = load <8 x double>, <8 x double>* %V, align 64
    925   ret <8 x double> %0
    926 }
    927 
    928 define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) {
    929 ; SSE-LABEL: test_v16f32_unaligned:
    930 ; SSE:       # %bb.0: # %entry
    931 ; SSE-NEXT:    movups (%rdi), %xmm0
    932 ; SSE-NEXT:    movups 16(%rdi), %xmm1
    933 ; SSE-NEXT:    movups 32(%rdi), %xmm2
    934 ; SSE-NEXT:    movups 48(%rdi), %xmm3
    935 ; SSE-NEXT:    retq
    936 ;
    937 ; AVXONLY-LABEL: test_v16f32_unaligned:
    938 ; AVXONLY:       # %bb.0: # %entry
    939 ; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
    940 ; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
    941 ; AVXONLY-NEXT:    retq
    942 ;
    943 ; AVX512-LABEL: test_v16f32_unaligned:
    944 ; AVX512:       # %bb.0: # %entry
    945 ; AVX512-NEXT:    vmovups (%rdi), %zmm0
    946 ; AVX512-NEXT:    retq
    947 entry:
    948   %0 = load <16 x float>, <16 x float>* %V, align 4
    949   ret <16 x float> %0
    950 }
    951 
    952 define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) {
    953 ; SSE-LABEL: test_v8f64_unaligned:
    954 ; SSE:       # %bb.0: # %entry
    955 ; SSE-NEXT:    movupd (%rdi), %xmm0
    956 ; SSE-NEXT:    movupd 16(%rdi), %xmm1
    957 ; SSE-NEXT:    movupd 32(%rdi), %xmm2
    958 ; SSE-NEXT:    movupd 48(%rdi), %xmm3
    959 ; SSE-NEXT:    retq
    960 ;
    961 ; AVXONLY-LABEL: test_v8f64_unaligned:
    962 ; AVXONLY:       # %bb.0: # %entry
    963 ; AVXONLY-NEXT:    vmovupd (%rdi), %ymm0
    964 ; AVXONLY-NEXT:    vmovupd 32(%rdi), %ymm1
    965 ; AVXONLY-NEXT:    retq
    966 ;
    967 ; AVX512-LABEL: test_v8f64_unaligned:
    968 ; AVX512:       # %bb.0: # %entry
    969 ; AVX512-NEXT:    vmovupd (%rdi), %zmm0
    970 ; AVX512-NEXT:    retq
    971 entry:
    972   %0 = load <8 x double>, <8 x double>* %V, align 4
    973   ret <8 x double> %0
    974 }
    975 
    976