Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-linux -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE
      3 ; RUN: llc < %s -mtriple=i686-linux -mattr=+avx    | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1
      4 ; RUN: llc < %s -mtriple=i686-linux -mattr=+avx2   | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2
      5 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE
      6 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx    | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
      7 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2   | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
      8 
      9 ; PR27708
     10 
     11 define <7 x i64> @load7_aligned(<7 x i64>* %x) {
     12 ; X86-SSE-LABEL: load7_aligned:
     13 ; X86-SSE:       # %bb.0:
     14 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
     15 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     16 ; X86-SSE-NEXT:    movaps (%ecx), %xmm0
     17 ; X86-SSE-NEXT:    movaps 16(%ecx), %xmm1
     18 ; X86-SSE-NEXT:    movaps 32(%ecx), %xmm2
     19 ; X86-SSE-NEXT:    movl 48(%ecx), %edx
     20 ; X86-SSE-NEXT:    movl 52(%ecx), %ecx
     21 ; X86-SSE-NEXT:    movl %ecx, 52(%eax)
     22 ; X86-SSE-NEXT:    movl %edx, 48(%eax)
     23 ; X86-SSE-NEXT:    movaps %xmm2, 32(%eax)
     24 ; X86-SSE-NEXT:    movaps %xmm1, 16(%eax)
     25 ; X86-SSE-NEXT:    movaps %xmm0, (%eax)
     26 ; X86-SSE-NEXT:    retl $4
     27 ;
     28 ; X86-AVX-LABEL: load7_aligned:
     29 ; X86-AVX:       # %bb.0:
     30 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     31 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     32 ; X86-AVX-NEXT:    vmovaps (%ecx), %ymm0
     33 ; X86-AVX-NEXT:    vmovaps 32(%ecx), %ymm1
     34 ; X86-AVX-NEXT:    vmovaps %ymm0, (%eax)
     35 ; X86-AVX-NEXT:    vextractf128 $1, %ymm1, %xmm0
     36 ; X86-AVX-NEXT:    vextractps $1, %xmm0, 52(%eax)
     37 ; X86-AVX-NEXT:    vmovss %xmm0, 48(%eax)
     38 ; X86-AVX-NEXT:    vmovaps %xmm1, 32(%eax)
     39 ; X86-AVX-NEXT:    vzeroupper
     40 ; X86-AVX-NEXT:    retl $4
     41 ;
     42 ; X64-SSE-LABEL: load7_aligned:
     43 ; X64-SSE:       # %bb.0:
     44 ; X64-SSE-NEXT:    movaps (%rsi), %xmm0
     45 ; X64-SSE-NEXT:    movaps 16(%rsi), %xmm1
     46 ; X64-SSE-NEXT:    movaps 32(%rsi), %xmm2
     47 ; X64-SSE-NEXT:    movq 48(%rsi), %rax
     48 ; X64-SSE-NEXT:    movq %rax, 48(%rdi)
     49 ; X64-SSE-NEXT:    movaps %xmm2, 32(%rdi)
     50 ; X64-SSE-NEXT:    movaps %xmm1, 16(%rdi)
     51 ; X64-SSE-NEXT:    movaps %xmm0, (%rdi)
     52 ; X64-SSE-NEXT:    movq %rdi, %rax
     53 ; X64-SSE-NEXT:    retq
     54 ;
     55 ; X64-AVX-LABEL: load7_aligned:
     56 ; X64-AVX:       # %bb.0:
     57 ; X64-AVX-NEXT:    vmovaps (%rsi), %ymm0
     58 ; X64-AVX-NEXT:    vmovaps 32(%rsi), %ymm1
     59 ; X64-AVX-NEXT:    vmovaps %ymm0, (%rdi)
     60 ; X64-AVX-NEXT:    vextractf128 $1, %ymm1, %xmm0
     61 ; X64-AVX-NEXT:    vmovlps %xmm0, 48(%rdi)
     62 ; X64-AVX-NEXT:    vmovaps %xmm1, 32(%rdi)
     63 ; X64-AVX-NEXT:    movq %rdi, %rax
     64 ; X64-AVX-NEXT:    vzeroupper
     65 ; X64-AVX-NEXT:    retq
     66   %x1 = load <7 x i64>, <7 x i64>* %x
     67   ret <7 x i64> %x1
     68 }
     69 
     70 define <7 x i64> @load7_unaligned(<7 x i64>* %x) {
     71 ; X86-SSE-LABEL: load7_unaligned:
     72 ; X86-SSE:       # %bb.0:
     73 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
     74 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     75 ; X86-SSE-NEXT:    movups (%ecx), %xmm0
     76 ; X86-SSE-NEXT:    movups 16(%ecx), %xmm1
     77 ; X86-SSE-NEXT:    movups 32(%ecx), %xmm2
     78 ; X86-SSE-NEXT:    movl 48(%ecx), %edx
     79 ; X86-SSE-NEXT:    movl 52(%ecx), %ecx
     80 ; X86-SSE-NEXT:    movl %ecx, 52(%eax)
     81 ; X86-SSE-NEXT:    movl %edx, 48(%eax)
     82 ; X86-SSE-NEXT:    movaps %xmm2, 32(%eax)
     83 ; X86-SSE-NEXT:    movaps %xmm1, 16(%eax)
     84 ; X86-SSE-NEXT:    movaps %xmm0, (%eax)
     85 ; X86-SSE-NEXT:    retl $4
     86 ;
     87 ; X86-AVX-LABEL: load7_unaligned:
     88 ; X86-AVX:       # %bb.0:
     89 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     90 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     91 ; X86-AVX-NEXT:    vmovups (%ecx), %ymm0
     92 ; X86-AVX-NEXT:    vmovups 32(%ecx), %xmm1
     93 ; X86-AVX-NEXT:    movl 48(%ecx), %edx
     94 ; X86-AVX-NEXT:    movl 52(%ecx), %ecx
     95 ; X86-AVX-NEXT:    movl %ecx, 52(%eax)
     96 ; X86-AVX-NEXT:    movl %edx, 48(%eax)
     97 ; X86-AVX-NEXT:    vmovaps %xmm1, 32(%eax)
     98 ; X86-AVX-NEXT:    vmovaps %ymm0, (%eax)
     99 ; X86-AVX-NEXT:    vzeroupper
    100 ; X86-AVX-NEXT:    retl $4
    101 ;
    102 ; X64-SSE-LABEL: load7_unaligned:
    103 ; X64-SSE:       # %bb.0:
    104 ; X64-SSE-NEXT:    movups (%rsi), %xmm0
    105 ; X64-SSE-NEXT:    movups 16(%rsi), %xmm1
    106 ; X64-SSE-NEXT:    movups 32(%rsi), %xmm2
    107 ; X64-SSE-NEXT:    movq 48(%rsi), %rax
    108 ; X64-SSE-NEXT:    movq %rax, 48(%rdi)
    109 ; X64-SSE-NEXT:    movaps %xmm2, 32(%rdi)
    110 ; X64-SSE-NEXT:    movaps %xmm1, 16(%rdi)
    111 ; X64-SSE-NEXT:    movaps %xmm0, (%rdi)
    112 ; X64-SSE-NEXT:    movq %rdi, %rax
    113 ; X64-SSE-NEXT:    retq
    114 ;
    115 ; X64-AVX-LABEL: load7_unaligned:
    116 ; X64-AVX:       # %bb.0:
    117 ; X64-AVX-NEXT:    vmovups (%rsi), %ymm0
    118 ; X64-AVX-NEXT:    vmovups 32(%rsi), %xmm1
    119 ; X64-AVX-NEXT:    movq 48(%rsi), %rax
    120 ; X64-AVX-NEXT:    movq %rax, 48(%rdi)
    121 ; X64-AVX-NEXT:    vmovaps %xmm1, 32(%rdi)
    122 ; X64-AVX-NEXT:    vmovaps %ymm0, (%rdi)
    123 ; X64-AVX-NEXT:    movq %rdi, %rax
    124 ; X64-AVX-NEXT:    vzeroupper
    125 ; X64-AVX-NEXT:    retq
    126   %x1 = load <7 x i64>, <7 x i64>* %x, align 1
    127   ret <7 x i64> %x1
    128 }
    129