1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s 3 4 %structTy = type { i8, i32, i32 } 5 6 @e = common global %structTy zeroinitializer, align 4 7 8 ;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder 9 ;; store operations. The first test stores in increasing address 10 ;; order, the second in decreasing -- but in both cases should have 11 ;; the same result in memory in the end. 12 13 define void @redundant_stores_merging() { 14 ; CHECK-LABEL: redundant_stores_merging: 15 ; CHECK: # %bb.0: 16 ; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 17 ; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) 18 ; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8 19 ; CHECK-NEXT: retq 20 store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4 21 store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 22 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 23 ret void 24 } 25 26 ;; This variant tests PR25154. 27 define void @redundant_stores_merging_reverse() { 28 ; CHECK-LABEL: redundant_stores_merging_reverse: 29 ; CHECK: # %bb.0: 30 ; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 31 ; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) 32 ; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8 33 ; CHECK-NEXT: retq 34 store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 35 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 36 store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4 37 ret void 38 } 39 40 @b = common global [8 x i8] zeroinitializer, align 2 41 42 ;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2; 43 ;; these must not be reordered in MergeConsecutiveStores such that the 44 ;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into 45 ;; a movl, after the store to 3). 46 47 define void @overlapping_stores_merging() { 48 ; CHECK-LABEL: overlapping_stores_merging: 49 ; CHECK: # %bb.0: 50 ; CHECK-NEXT: movl $1, {{.*}}(%rip) 51 ; CHECK-NEXT: movw $2, b+{{.*}}(%rip) 52 ; CHECK-NEXT: retq 53 store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2 54 store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1 55 store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2 56 ret void 57 } 58 59 define void @extract_vector_store_16_consecutive_bytes(<2 x i64> %v, i8* %ptr) #0 { 60 ; CHECK-LABEL: extract_vector_store_16_consecutive_bytes: 61 ; CHECK: # %bb.0: 62 ; CHECK-NEXT: vmovups %xmm0, (%rdi) 63 ; CHECK-NEXT: retq 64 %bc = bitcast <2 x i64> %v to <16 x i8> 65 %ext00 = extractelement <16 x i8> %bc, i32 0 66 %ext01 = extractelement <16 x i8> %bc, i32 1 67 %ext02 = extractelement <16 x i8> %bc, i32 2 68 %ext03 = extractelement <16 x i8> %bc, i32 3 69 %ext04 = extractelement <16 x i8> %bc, i32 4 70 %ext05 = extractelement <16 x i8> %bc, i32 5 71 %ext06 = extractelement <16 x i8> %bc, i32 6 72 %ext07 = extractelement <16 x i8> %bc, i32 7 73 %ext08 = extractelement <16 x i8> %bc, i32 8 74 %ext09 = extractelement <16 x i8> %bc, i32 9 75 %ext10 = extractelement <16 x i8> %bc, i32 10 76 %ext11 = extractelement <16 x i8> %bc, i32 11 77 %ext12 = extractelement <16 x i8> %bc, i32 12 78 %ext13 = extractelement <16 x i8> %bc, i32 13 79 %ext14 = extractelement <16 x i8> %bc, i32 14 80 %ext15 = extractelement <16 x i8> %bc, i32 15 81 %gep00 = getelementptr inbounds i8, i8* %ptr, i64 0 82 %gep01 = getelementptr inbounds i8, i8* %ptr, i64 1 83 %gep02 = getelementptr inbounds i8, i8* %ptr, i64 2 84 %gep03 = getelementptr inbounds i8, i8* %ptr, i64 3 85 %gep04 = getelementptr inbounds i8, i8* %ptr, i64 4 86 %gep05 = getelementptr inbounds i8, i8* %ptr, i64 5 87 %gep06 = getelementptr inbounds i8, i8* %ptr, i64 6 88 %gep07 = getelementptr inbounds i8, i8* %ptr, i64 7 89 %gep08 = getelementptr inbounds i8, i8* %ptr, i64 8 90 %gep09 = getelementptr inbounds i8, i8* %ptr, i64 9 91 %gep10 = getelementptr inbounds i8, i8* %ptr, i64 10 92 %gep11 = getelementptr inbounds i8, i8* %ptr, i64 11 93 %gep12 = getelementptr inbounds i8, i8* %ptr, i64 12 94 %gep13 = getelementptr inbounds i8, i8* %ptr, i64 13 95 %gep14 = getelementptr inbounds i8, i8* %ptr, i64 14 96 %gep15 = getelementptr inbounds i8, i8* %ptr, i64 15 97 store i8 %ext00, i8* %gep00, align 1 98 store i8 %ext01, i8* %gep01, align 1 99 store i8 %ext02, i8* %gep02, align 1 100 store i8 %ext03, i8* %gep03, align 1 101 store i8 %ext04, i8* %gep04, align 1 102 store i8 %ext05, i8* %gep05, align 1 103 store i8 %ext06, i8* %gep06, align 1 104 store i8 %ext07, i8* %gep07, align 1 105 store i8 %ext08, i8* %gep08, align 1 106 store i8 %ext09, i8* %gep09, align 1 107 store i8 %ext10, i8* %gep10, align 1 108 store i8 %ext11, i8* %gep11, align 1 109 store i8 %ext12, i8* %gep12, align 1 110 store i8 %ext13, i8* %gep13, align 1 111 store i8 %ext14, i8* %gep14, align 1 112 store i8 %ext15, i8* %gep15, align 1 113 ret void 114 } 115 116 ; PR34217 - https://bugs.llvm.org/show_bug.cgi?id=34217 117 118 define void @extract_vector_store_32_consecutive_bytes(<4 x i64> %v, i8* %ptr) #0 { 119 ; CHECK-LABEL: extract_vector_store_32_consecutive_bytes: 120 ; CHECK: # %bb.0: 121 ; CHECK-NEXT: vmovups %ymm0, (%rdi) 122 ; CHECK-NEXT: vzeroupper 123 ; CHECK-NEXT: retq 124 %bc = bitcast <4 x i64> %v to <32 x i8> 125 %ext00 = extractelement <32 x i8> %bc, i32 0 126 %ext01 = extractelement <32 x i8> %bc, i32 1 127 %ext02 = extractelement <32 x i8> %bc, i32 2 128 %ext03 = extractelement <32 x i8> %bc, i32 3 129 %ext04 = extractelement <32 x i8> %bc, i32 4 130 %ext05 = extractelement <32 x i8> %bc, i32 5 131 %ext06 = extractelement <32 x i8> %bc, i32 6 132 %ext07 = extractelement <32 x i8> %bc, i32 7 133 %ext08 = extractelement <32 x i8> %bc, i32 8 134 %ext09 = extractelement <32 x i8> %bc, i32 9 135 %ext10 = extractelement <32 x i8> %bc, i32 10 136 %ext11 = extractelement <32 x i8> %bc, i32 11 137 %ext12 = extractelement <32 x i8> %bc, i32 12 138 %ext13 = extractelement <32 x i8> %bc, i32 13 139 %ext14 = extractelement <32 x i8> %bc, i32 14 140 %ext15 = extractelement <32 x i8> %bc, i32 15 141 %ext16 = extractelement <32 x i8> %bc, i32 16 142 %ext17 = extractelement <32 x i8> %bc, i32 17 143 %ext18 = extractelement <32 x i8> %bc, i32 18 144 %ext19 = extractelement <32 x i8> %bc, i32 19 145 %ext20 = extractelement <32 x i8> %bc, i32 20 146 %ext21 = extractelement <32 x i8> %bc, i32 21 147 %ext22 = extractelement <32 x i8> %bc, i32 22 148 %ext23 = extractelement <32 x i8> %bc, i32 23 149 %ext24 = extractelement <32 x i8> %bc, i32 24 150 %ext25 = extractelement <32 x i8> %bc, i32 25 151 %ext26 = extractelement <32 x i8> %bc, i32 26 152 %ext27 = extractelement <32 x i8> %bc, i32 27 153 %ext28 = extractelement <32 x i8> %bc, i32 28 154 %ext29 = extractelement <32 x i8> %bc, i32 29 155 %ext30 = extractelement <32 x i8> %bc, i32 30 156 %ext31 = extractelement <32 x i8> %bc, i32 31 157 %gep00 = getelementptr inbounds i8, i8* %ptr, i64 0 158 %gep01 = getelementptr inbounds i8, i8* %ptr, i64 1 159 %gep02 = getelementptr inbounds i8, i8* %ptr, i64 2 160 %gep03 = getelementptr inbounds i8, i8* %ptr, i64 3 161 %gep04 = getelementptr inbounds i8, i8* %ptr, i64 4 162 %gep05 = getelementptr inbounds i8, i8* %ptr, i64 5 163 %gep06 = getelementptr inbounds i8, i8* %ptr, i64 6 164 %gep07 = getelementptr inbounds i8, i8* %ptr, i64 7 165 %gep08 = getelementptr inbounds i8, i8* %ptr, i64 8 166 %gep09 = getelementptr inbounds i8, i8* %ptr, i64 9 167 %gep10 = getelementptr inbounds i8, i8* %ptr, i64 10 168 %gep11 = getelementptr inbounds i8, i8* %ptr, i64 11 169 %gep12 = getelementptr inbounds i8, i8* %ptr, i64 12 170 %gep13 = getelementptr inbounds i8, i8* %ptr, i64 13 171 %gep14 = getelementptr inbounds i8, i8* %ptr, i64 14 172 %gep15 = getelementptr inbounds i8, i8* %ptr, i64 15 173 %gep16 = getelementptr inbounds i8, i8* %ptr, i64 16 174 %gep17 = getelementptr inbounds i8, i8* %ptr, i64 17 175 %gep18 = getelementptr inbounds i8, i8* %ptr, i64 18 176 %gep19 = getelementptr inbounds i8, i8* %ptr, i64 19 177 %gep20 = getelementptr inbounds i8, i8* %ptr, i64 20 178 %gep21 = getelementptr inbounds i8, i8* %ptr, i64 21 179 %gep22 = getelementptr inbounds i8, i8* %ptr, i64 22 180 %gep23 = getelementptr inbounds i8, i8* %ptr, i64 23 181 %gep24 = getelementptr inbounds i8, i8* %ptr, i64 24 182 %gep25 = getelementptr inbounds i8, i8* %ptr, i64 25 183 %gep26 = getelementptr inbounds i8, i8* %ptr, i64 26 184 %gep27 = getelementptr inbounds i8, i8* %ptr, i64 27 185 %gep28 = getelementptr inbounds i8, i8* %ptr, i64 28 186 %gep29 = getelementptr inbounds i8, i8* %ptr, i64 29 187 %gep30 = getelementptr inbounds i8, i8* %ptr, i64 30 188 %gep31 = getelementptr inbounds i8, i8* %ptr, i64 31 189 store i8 %ext00, i8* %gep00, align 1 190 store i8 %ext01, i8* %gep01, align 1 191 store i8 %ext02, i8* %gep02, align 1 192 store i8 %ext03, i8* %gep03, align 1 193 store i8 %ext04, i8* %gep04, align 1 194 store i8 %ext05, i8* %gep05, align 1 195 store i8 %ext06, i8* %gep06, align 1 196 store i8 %ext07, i8* %gep07, align 1 197 store i8 %ext08, i8* %gep08, align 1 198 store i8 %ext09, i8* %gep09, align 1 199 store i8 %ext10, i8* %gep10, align 1 200 store i8 %ext11, i8* %gep11, align 1 201 store i8 %ext12, i8* %gep12, align 1 202 store i8 %ext13, i8* %gep13, align 1 203 store i8 %ext14, i8* %gep14, align 1 204 store i8 %ext15, i8* %gep15, align 1 205 store i8 %ext16, i8* %gep16, align 1 206 store i8 %ext17, i8* %gep17, align 1 207 store i8 %ext18, i8* %gep18, align 1 208 store i8 %ext19, i8* %gep19, align 1 209 store i8 %ext20, i8* %gep20, align 1 210 store i8 %ext21, i8* %gep21, align 1 211 store i8 %ext22, i8* %gep22, align 1 212 store i8 %ext23, i8* %gep23, align 1 213 store i8 %ext24, i8* %gep24, align 1 214 store i8 %ext25, i8* %gep25, align 1 215 store i8 %ext26, i8* %gep26, align 1 216 store i8 %ext27, i8* %gep27, align 1 217 store i8 %ext28, i8* %gep28, align 1 218 store i8 %ext29, i8* %gep29, align 1 219 store i8 %ext30, i8* %gep30, align 1 220 store i8 %ext31, i8* %gep31, align 1 221 ret void 222 } 223 224