Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
      3 
      4 %structTy = type { i8, i32, i32 }
      5 
      6 @e = common global %structTy zeroinitializer, align 4
      7 
      8 ;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder
      9 ;; store operations.  The first test stores in increasing address
     10 ;; order, the second in decreasing -- but in both cases should have
     11 ;; the same result in memory in the end.
     12 
     13 define void @redundant_stores_merging() {
     14 ; CHECK-LABEL: redundant_stores_merging:
     15 ; CHECK:       # %bb.0:
     16 ; CHECK-NEXT:    movabsq $528280977409, %rax # imm = 0x7B00000001
     17 ; CHECK-NEXT:    movq %rax, e+{{.*}}(%rip)
     18 ; CHECK-NEXT:    movl $456, e+{{.*}}(%rip) # imm = 0x1C8
     19 ; CHECK-NEXT:    retq
     20   store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
     21   store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
     22   store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
     23   ret void
     24 }
     25 
     26 ;; This variant tests PR25154.
     27 define void @redundant_stores_merging_reverse() {
     28 ; CHECK-LABEL: redundant_stores_merging_reverse:
     29 ; CHECK:       # %bb.0:
     30 ; CHECK-NEXT:    movabsq $528280977409, %rax # imm = 0x7B00000001
     31 ; CHECK-NEXT:    movq %rax, e+{{.*}}(%rip)
     32 ; CHECK-NEXT:    movl $456, e+{{.*}}(%rip) # imm = 0x1C8
     33 ; CHECK-NEXT:    retq
     34   store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
     35   store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
     36   store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
     37   ret void
     38 }
     39 
     40 @b = common global [8 x i8] zeroinitializer, align 2
     41 
     42 ;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2;
     43 ;; these must not be reordered in MergeConsecutiveStores such that the
     44 ;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into
     45 ;; a movl, after the store to 3).
     46 
     47 define void @overlapping_stores_merging() {
     48 ; CHECK-LABEL: overlapping_stores_merging:
     49 ; CHECK:       # %bb.0:
     50 ; CHECK-NEXT:    movl $1, {{.*}}(%rip)
     51 ; CHECK-NEXT:    movw $2, b+{{.*}}(%rip)
     52 ; CHECK-NEXT:    retq
     53   store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2
     54   store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1
     55   store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2
     56   ret void
     57 }
     58 
     59 define void @extract_vector_store_16_consecutive_bytes(<2 x i64> %v, i8* %ptr) #0 {
     60 ; CHECK-LABEL: extract_vector_store_16_consecutive_bytes:
     61 ; CHECK:       # %bb.0:
     62 ; CHECK-NEXT:    vmovups %xmm0, (%rdi)
     63 ; CHECK-NEXT:    retq
     64   %bc = bitcast <2 x i64> %v to <16 x i8>
     65   %ext00 = extractelement <16 x i8> %bc, i32 0
     66   %ext01 = extractelement <16 x i8> %bc, i32 1
     67   %ext02 = extractelement <16 x i8> %bc, i32 2
     68   %ext03 = extractelement <16 x i8> %bc, i32 3
     69   %ext04 = extractelement <16 x i8> %bc, i32 4
     70   %ext05 = extractelement <16 x i8> %bc, i32 5
     71   %ext06 = extractelement <16 x i8> %bc, i32 6
     72   %ext07 = extractelement <16 x i8> %bc, i32 7
     73   %ext08 = extractelement <16 x i8> %bc, i32 8
     74   %ext09 = extractelement <16 x i8> %bc, i32 9
     75   %ext10 = extractelement <16 x i8> %bc, i32 10
     76   %ext11 = extractelement <16 x i8> %bc, i32 11
     77   %ext12 = extractelement <16 x i8> %bc, i32 12
     78   %ext13 = extractelement <16 x i8> %bc, i32 13
     79   %ext14 = extractelement <16 x i8> %bc, i32 14
     80   %ext15 = extractelement <16 x i8> %bc, i32 15
     81   %gep00 = getelementptr inbounds i8, i8* %ptr, i64 0
     82   %gep01 = getelementptr inbounds i8, i8* %ptr, i64 1
     83   %gep02 = getelementptr inbounds i8, i8* %ptr, i64 2
     84   %gep03 = getelementptr inbounds i8, i8* %ptr, i64 3
     85   %gep04 = getelementptr inbounds i8, i8* %ptr, i64 4
     86   %gep05 = getelementptr inbounds i8, i8* %ptr, i64 5
     87   %gep06 = getelementptr inbounds i8, i8* %ptr, i64 6
     88   %gep07 = getelementptr inbounds i8, i8* %ptr, i64 7
     89   %gep08 = getelementptr inbounds i8, i8* %ptr, i64 8
     90   %gep09 = getelementptr inbounds i8, i8* %ptr, i64 9
     91   %gep10 = getelementptr inbounds i8, i8* %ptr, i64 10
     92   %gep11 = getelementptr inbounds i8, i8* %ptr, i64 11
     93   %gep12 = getelementptr inbounds i8, i8* %ptr, i64 12
     94   %gep13 = getelementptr inbounds i8, i8* %ptr, i64 13
     95   %gep14 = getelementptr inbounds i8, i8* %ptr, i64 14
     96   %gep15 = getelementptr inbounds i8, i8* %ptr, i64 15
     97   store i8 %ext00, i8* %gep00, align 1
     98   store i8 %ext01, i8* %gep01, align 1
     99   store i8 %ext02, i8* %gep02, align 1
    100   store i8 %ext03, i8* %gep03, align 1
    101   store i8 %ext04, i8* %gep04, align 1
    102   store i8 %ext05, i8* %gep05, align 1
    103   store i8 %ext06, i8* %gep06, align 1
    104   store i8 %ext07, i8* %gep07, align 1
    105   store i8 %ext08, i8* %gep08, align 1
    106   store i8 %ext09, i8* %gep09, align 1
    107   store i8 %ext10, i8* %gep10, align 1
    108   store i8 %ext11, i8* %gep11, align 1
    109   store i8 %ext12, i8* %gep12, align 1
    110   store i8 %ext13, i8* %gep13, align 1
    111   store i8 %ext14, i8* %gep14, align 1
    112   store i8 %ext15, i8* %gep15, align 1
    113   ret void
    114 }
    115 
    116 ; PR34217 - https://bugs.llvm.org/show_bug.cgi?id=34217
    117 
    118 define void @extract_vector_store_32_consecutive_bytes(<4 x i64> %v, i8* %ptr) #0 {
    119 ; CHECK-LABEL: extract_vector_store_32_consecutive_bytes:
    120 ; CHECK:       # %bb.0:
    121 ; CHECK-NEXT:    vmovups %ymm0, (%rdi)
    122 ; CHECK-NEXT:    vzeroupper
    123 ; CHECK-NEXT:    retq
    124   %bc = bitcast <4 x i64> %v to <32 x i8>
    125   %ext00 = extractelement <32 x i8> %bc, i32 0
    126   %ext01 = extractelement <32 x i8> %bc, i32 1
    127   %ext02 = extractelement <32 x i8> %bc, i32 2
    128   %ext03 = extractelement <32 x i8> %bc, i32 3
    129   %ext04 = extractelement <32 x i8> %bc, i32 4
    130   %ext05 = extractelement <32 x i8> %bc, i32 5
    131   %ext06 = extractelement <32 x i8> %bc, i32 6
    132   %ext07 = extractelement <32 x i8> %bc, i32 7
    133   %ext08 = extractelement <32 x i8> %bc, i32 8
    134   %ext09 = extractelement <32 x i8> %bc, i32 9
    135   %ext10 = extractelement <32 x i8> %bc, i32 10
    136   %ext11 = extractelement <32 x i8> %bc, i32 11
    137   %ext12 = extractelement <32 x i8> %bc, i32 12
    138   %ext13 = extractelement <32 x i8> %bc, i32 13
    139   %ext14 = extractelement <32 x i8> %bc, i32 14
    140   %ext15 = extractelement <32 x i8> %bc, i32 15
    141   %ext16 = extractelement <32 x i8> %bc, i32 16
    142   %ext17 = extractelement <32 x i8> %bc, i32 17
    143   %ext18 = extractelement <32 x i8> %bc, i32 18
    144   %ext19 = extractelement <32 x i8> %bc, i32 19
    145   %ext20 = extractelement <32 x i8> %bc, i32 20
    146   %ext21 = extractelement <32 x i8> %bc, i32 21
    147   %ext22 = extractelement <32 x i8> %bc, i32 22
    148   %ext23 = extractelement <32 x i8> %bc, i32 23
    149   %ext24 = extractelement <32 x i8> %bc, i32 24
    150   %ext25 = extractelement <32 x i8> %bc, i32 25
    151   %ext26 = extractelement <32 x i8> %bc, i32 26
    152   %ext27 = extractelement <32 x i8> %bc, i32 27
    153   %ext28 = extractelement <32 x i8> %bc, i32 28
    154   %ext29 = extractelement <32 x i8> %bc, i32 29
    155   %ext30 = extractelement <32 x i8> %bc, i32 30
    156   %ext31 = extractelement <32 x i8> %bc, i32 31
    157   %gep00 = getelementptr inbounds i8, i8* %ptr, i64 0
    158   %gep01 = getelementptr inbounds i8, i8* %ptr, i64 1
    159   %gep02 = getelementptr inbounds i8, i8* %ptr, i64 2
    160   %gep03 = getelementptr inbounds i8, i8* %ptr, i64 3
    161   %gep04 = getelementptr inbounds i8, i8* %ptr, i64 4
    162   %gep05 = getelementptr inbounds i8, i8* %ptr, i64 5
    163   %gep06 = getelementptr inbounds i8, i8* %ptr, i64 6
    164   %gep07 = getelementptr inbounds i8, i8* %ptr, i64 7
    165   %gep08 = getelementptr inbounds i8, i8* %ptr, i64 8
    166   %gep09 = getelementptr inbounds i8, i8* %ptr, i64 9
    167   %gep10 = getelementptr inbounds i8, i8* %ptr, i64 10
    168   %gep11 = getelementptr inbounds i8, i8* %ptr, i64 11
    169   %gep12 = getelementptr inbounds i8, i8* %ptr, i64 12
    170   %gep13 = getelementptr inbounds i8, i8* %ptr, i64 13
    171   %gep14 = getelementptr inbounds i8, i8* %ptr, i64 14
    172   %gep15 = getelementptr inbounds i8, i8* %ptr, i64 15
    173   %gep16 = getelementptr inbounds i8, i8* %ptr, i64 16
    174   %gep17 = getelementptr inbounds i8, i8* %ptr, i64 17
    175   %gep18 = getelementptr inbounds i8, i8* %ptr, i64 18
    176   %gep19 = getelementptr inbounds i8, i8* %ptr, i64 19
    177   %gep20 = getelementptr inbounds i8, i8* %ptr, i64 20
    178   %gep21 = getelementptr inbounds i8, i8* %ptr, i64 21
    179   %gep22 = getelementptr inbounds i8, i8* %ptr, i64 22
    180   %gep23 = getelementptr inbounds i8, i8* %ptr, i64 23
    181   %gep24 = getelementptr inbounds i8, i8* %ptr, i64 24
    182   %gep25 = getelementptr inbounds i8, i8* %ptr, i64 25
    183   %gep26 = getelementptr inbounds i8, i8* %ptr, i64 26
    184   %gep27 = getelementptr inbounds i8, i8* %ptr, i64 27
    185   %gep28 = getelementptr inbounds i8, i8* %ptr, i64 28
    186   %gep29 = getelementptr inbounds i8, i8* %ptr, i64 29
    187   %gep30 = getelementptr inbounds i8, i8* %ptr, i64 30
    188   %gep31 = getelementptr inbounds i8, i8* %ptr, i64 31
    189   store i8 %ext00, i8* %gep00, align 1
    190   store i8 %ext01, i8* %gep01, align 1
    191   store i8 %ext02, i8* %gep02, align 1
    192   store i8 %ext03, i8* %gep03, align 1
    193   store i8 %ext04, i8* %gep04, align 1
    194   store i8 %ext05, i8* %gep05, align 1
    195   store i8 %ext06, i8* %gep06, align 1
    196   store i8 %ext07, i8* %gep07, align 1
    197   store i8 %ext08, i8* %gep08, align 1
    198   store i8 %ext09, i8* %gep09, align 1
    199   store i8 %ext10, i8* %gep10, align 1
    200   store i8 %ext11, i8* %gep11, align 1
    201   store i8 %ext12, i8* %gep12, align 1
    202   store i8 %ext13, i8* %gep13, align 1
    203   store i8 %ext14, i8* %gep14, align 1
    204   store i8 %ext15, i8* %gep15, align 1
    205   store i8 %ext16, i8* %gep16, align 1
    206   store i8 %ext17, i8* %gep17, align 1
    207   store i8 %ext18, i8* %gep18, align 1
    208   store i8 %ext19, i8* %gep19, align 1
    209   store i8 %ext20, i8* %gep20, align 1
    210   store i8 %ext21, i8* %gep21, align 1
    211   store i8 %ext22, i8* %gep22, align 1
    212   store i8 %ext23, i8* %gep23, align 1
    213   store i8 %ext24, i8* %gep24, align 1
    214   store i8 %ext25, i8* %gep25, align 1
    215   store i8 %ext26, i8* %gep26, align 1
    216   store i8 %ext27, i8* %gep27, align 1
    217   store i8 %ext28, i8* %gep28, align 1
    218   store i8 %ext29, i8* %gep29, align 1
    219   store i8 %ext30, i8* %gep30, align 1
    220   store i8 %ext31, i8* %gep31, align 1
    221   ret void
    222 }
    223 
    224