Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64
      4 
      5 define void @big_nonzero_16_bytes(i32* nocapture %a) {
      6 ; X32-LABEL: big_nonzero_16_bytes:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,3,4]
     10 ; X32-NEXT:    vmovups %xmm0, (%eax)
     11 ; X32-NEXT:    retl
     12 ;
     13 ; X64-LABEL: big_nonzero_16_bytes:
     14 ; X64:       # %bb.0:
     15 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,3,4]
     16 ; X64-NEXT:    vmovups %xmm0, (%rdi)
     17 ; X64-NEXT:    retq
     18   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
     19   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
     20   %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3
     21 
     22   store i32 1, i32* %a, align 4
     23   store i32 2, i32* %arrayidx1, align 4
     24   store i32 3, i32* %arrayidx2, align 4
     25   store i32 4, i32* %arrayidx3, align 4
     26   ret void
     27 }
     28 
     29 ; TODO: We assumed that two 64-bit stores were better than 1 vector load and 1 vector store.
     30 ; But if the 64-bit constants can't be represented as sign-extended 32-bit constants, then
     31 ; it takes extra instructions to do this in scalar.
     32 
     33 define void @big_nonzero_16_bytes_big64bit_constants(i64* nocapture %a) {
     34 ; X32-LABEL: big_nonzero_16_bytes_big64bit_constants:
     35 ; X32:       # %bb.0:
     36 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     37 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,1,3]
     38 ; X32-NEXT:    vmovups %xmm0, (%eax)
     39 ; X32-NEXT:    retl
     40 ;
     41 ; X64-LABEL: big_nonzero_16_bytes_big64bit_constants:
     42 ; X64:       # %bb.0:
     43 ; X64-NEXT:    movabsq $4294967297, %rax # imm = 0x100000001
     44 ; X64-NEXT:    movq %rax, (%rdi)
     45 ; X64-NEXT:    movabsq $12884901889, %rax # imm = 0x300000001
     46 ; X64-NEXT:    movq %rax, 8(%rdi)
     47 ; X64-NEXT:    retq
     48   %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1
     49 
     50   store i64 4294967297, i64* %a
     51   store i64 12884901889, i64* %arrayidx1
     52   ret void
     53 }
     54 
     55 ; Splats may be an opportunity to use a broadcast op.
     56 
     57 define void @big_nonzero_32_bytes_splat(i32* nocapture %a) {
     58 ; X32-LABEL: big_nonzero_32_bytes_splat:
     59 ; X32:       # %bb.0:
     60 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     61 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
     62 ; X32-NEXT:    vmovups %ymm0, (%eax)
     63 ; X32-NEXT:    vzeroupper
     64 ; X32-NEXT:    retl
     65 ;
     66 ; X64-LABEL: big_nonzero_32_bytes_splat:
     67 ; X64:       # %bb.0:
     68 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
     69 ; X64-NEXT:    vmovups %ymm0, (%rdi)
     70 ; X64-NEXT:    vzeroupper
     71 ; X64-NEXT:    retq
     72   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
     73   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
     74   %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 3
     75   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 4
     76   %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 5
     77   %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 6
     78   %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 7
     79 
     80   store i32 42, i32* %a, align 4
     81   store i32 42, i32* %arrayidx1, align 4
     82   store i32 42, i32* %arrayidx2, align 4
     83   store i32 42, i32* %arrayidx3, align 4
     84   store i32 42, i32* %arrayidx4, align 4
     85   store i32 42, i32* %arrayidx5, align 4
     86   store i32 42, i32* %arrayidx6, align 4
     87   store i32 42, i32* %arrayidx7, align 4
     88   ret void
     89 }
     90 
     91 ; Verify that we choose the best-sized store(s) for each chunk.
     92 
     93 define void @big_nonzero_63_bytes(i8* nocapture %a) {
     94 ; X32-LABEL: big_nonzero_63_bytes:
     95 ; X32:       # %bb.0:
     96 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     97 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [1,0,2,0,3,0,4,0]
     98 ; X32-NEXT:    vmovups %ymm0, (%eax)
     99 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [5,0,6,0]
    100 ; X32-NEXT:    vmovups %xmm0, 32(%eax)
    101 ; X32-NEXT:    movl $0, 52(%eax)
    102 ; X32-NEXT:    movl $7, 48(%eax)
    103 ; X32-NEXT:    movl $8, 56(%eax)
    104 ; X32-NEXT:    movw $9, 60(%eax)
    105 ; X32-NEXT:    movb $10, 62(%eax)
    106 ; X32-NEXT:    vzeroupper
    107 ; X32-NEXT:    retl
    108 ;
    109 ; X64-LABEL: big_nonzero_63_bytes:
    110 ; X64:       # %bb.0:
    111 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,3,4]
    112 ; X64-NEXT:    vmovups %ymm0, (%rdi)
    113 ; X64-NEXT:    movq $5, 32(%rdi)
    114 ; X64-NEXT:    movq $6, 40(%rdi)
    115 ; X64-NEXT:    movq $7, 48(%rdi)
    116 ; X64-NEXT:    movl $8, 56(%rdi)
    117 ; X64-NEXT:    movw $9, 60(%rdi)
    118 ; X64-NEXT:    movb $10, 62(%rdi)
    119 ; X64-NEXT:    vzeroupper
    120 ; X64-NEXT:    retq
    121   %a8 = bitcast i8* %a to i64*
    122   %arrayidx8 = getelementptr inbounds i64, i64* %a8, i64 1
    123   %arrayidx16 = getelementptr inbounds i64, i64* %a8, i64 2
    124   %arrayidx24 = getelementptr inbounds i64, i64* %a8, i64 3
    125   %arrayidx32 = getelementptr inbounds i64, i64* %a8, i64 4
    126   %arrayidx40 = getelementptr inbounds i64, i64* %a8, i64 5
    127   %arrayidx48 = getelementptr inbounds i64, i64* %a8, i64 6
    128   %a4 = bitcast i8* %a to i32*
    129   %arrayidx56 = getelementptr inbounds i32, i32* %a4, i64 14
    130   %a2 = bitcast i8* %a to i16*
    131   %arrayidx60 = getelementptr inbounds i16, i16* %a2, i64 30
    132   %arrayidx62 = getelementptr inbounds i8, i8* %a, i64 62
    133 
    134   store i64 1, i64* %a8
    135   store i64 2, i64* %arrayidx8
    136   store i64 3, i64* %arrayidx16
    137   store i64 4, i64* %arrayidx24
    138   store i64 5, i64* %arrayidx32
    139   store i64 6, i64* %arrayidx40
    140   store i64 7, i64* %arrayidx48
    141   store i32 8, i32* %arrayidx56
    142   store i16 9, i16* %arrayidx60
    143   store i8 10, i8* %arrayidx62
    144   ret void
    145 }
    146 
    147