1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s 2 3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 target triple = "x86_64-apple-macosx10.8.0" 5 6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } 7 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } 8 9 ; CHECK: merge_const_store 10 ; save 1,2,3 ... as one big integer. 11 ; CHECK: movabsq $578437695752307201 12 ; CHECK: ret 13 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 14 %1 = icmp sgt i32 %count, 0 15 br i1 %1, label %.lr.ph, label %._crit_edge 16 .lr.ph: 17 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 18 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 19 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 20 store i8 1, i8* %2, align 1 21 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 22 store i8 2, i8* %3, align 1 23 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 24 store i8 3, i8* %4, align 1 25 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 26 store i8 4, i8* %5, align 1 27 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 28 store i8 5, i8* %6, align 1 29 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 30 store i8 6, i8* %7, align 1 31 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 32 store i8 7, i8* %8, align 1 33 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 34 store i8 8, i8* %9, align 1 35 %10 = add nsw i32 %i.02, 1 36 %11 = getelementptr inbounds %struct.A* %.01, i64 1 37 %exitcond = icmp eq i32 %10, %count 38 br i1 %exitcond, label %._crit_edge, label %.lr.ph 39 ._crit_edge: 40 ret void 41 } 42 43 ; No vectors because we use noimplicitfloat 44 ; CHECK: merge_const_store_no_vec 45 ; CHECK-NOT: vmovups 46 ; CHECK: ret 47 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{ 48 %1 = icmp sgt i32 %count, 0 49 br i1 %1, label %.lr.ph, label %._crit_edge 50 .lr.ph: 51 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 52 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 53 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 54 store i32 0, i32* %2, align 4 55 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 56 store i32 0, i32* %3, align 4 57 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 58 store i32 0, i32* %4, align 4 59 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 60 store i32 0, i32* %5, align 4 61 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4 62 store i32 0, i32* %6, align 4 63 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5 64 store i32 0, i32* %7, align 4 65 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6 66 store i32 0, i32* %8, align 4 67 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7 68 store i32 0, i32* %9, align 4 69 %10 = add nsw i32 %i.02, 1 70 %11 = getelementptr inbounds %struct.B* %.01, i64 1 71 %exitcond = icmp eq i32 %10, %count 72 br i1 %exitcond, label %._crit_edge, label %.lr.ph 73 ._crit_edge: 74 ret void 75 } 76 77 ; Move the constants using a single vector store. 78 ; CHECK: merge_const_store_vec 79 ; CHECK: vmovups 80 ; CHECK: ret 81 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp { 82 %1 = icmp sgt i32 %count, 0 83 br i1 %1, label %.lr.ph, label %._crit_edge 84 .lr.ph: 85 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 86 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 87 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 88 store i32 0, i32* %2, align 4 89 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 90 store i32 0, i32* %3, align 4 91 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 92 store i32 0, i32* %4, align 4 93 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 94 store i32 0, i32* %5, align 4 95 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4 96 store i32 0, i32* %6, align 4 97 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5 98 store i32 0, i32* %7, align 4 99 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6 100 store i32 0, i32* %8, align 4 101 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7 102 store i32 0, i32* %9, align 4 103 %10 = add nsw i32 %i.02, 1 104 %11 = getelementptr inbounds %struct.B* %.01, i64 1 105 %exitcond = icmp eq i32 %10, %count 106 br i1 %exitcond, label %._crit_edge, label %.lr.ph 107 ._crit_edge: 108 ret void 109 } 110 111 ; Move the first 4 constants as a single vector. Move the rest as scalars. 112 ; CHECK: merge_nonconst_store 113 ; CHECK: movl $67305985 114 ; CHECK: movb 115 ; CHECK: movb 116 ; CHECK: movb 117 ; CHECK: movb 118 ; CHECK: ret 119 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 120 %1 = icmp sgt i32 %count, 0 121 br i1 %1, label %.lr.ph, label %._crit_edge 122 .lr.ph: 123 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 124 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 125 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 126 store i8 1, i8* %2, align 1 127 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 128 store i8 2, i8* %3, align 1 129 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 130 store i8 3, i8* %4, align 1 131 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 132 store i8 4, i8* %5, align 1 133 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 134 store i8 %zz, i8* %6, align 1 ; <----------- Not a const; 135 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 136 store i8 6, i8* %7, align 1 137 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 138 store i8 7, i8* %8, align 1 139 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 140 store i8 8, i8* %9, align 1 141 %10 = add nsw i32 %i.02, 1 142 %11 = getelementptr inbounds %struct.A* %.01, i64 1 143 %exitcond = icmp eq i32 %10, %count 144 br i1 %exitcond, label %._crit_edge, label %.lr.ph 145 ._crit_edge: 146 ret void 147 } 148 149 150 ;CHECK: merge_loads_i16 151 ; load: 152 ;CHECK: movw 153 ; store: 154 ;CHECK: movw 155 ;CHECK: ret 156 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 157 %1 = icmp sgt i32 %count, 0 158 br i1 %1, label %.lr.ph, label %._crit_edge 159 160 .lr.ph: ; preds = %0 161 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 162 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 163 br label %4 164 165 ; <label>:4 ; preds = %4, %.lr.ph 166 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 167 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] 168 %5 = load i8* %2, align 1 169 %6 = load i8* %3, align 1 170 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 171 store i8 %5, i8* %7, align 1 172 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 173 store i8 %6, i8* %8, align 1 174 %9 = add nsw i32 %i.02, 1 175 %10 = getelementptr inbounds %struct.A* %.01, i64 1 176 %exitcond = icmp eq i32 %9, %count 177 br i1 %exitcond, label %._crit_edge, label %4 178 179 ._crit_edge: ; preds = %4, %0 180 ret void 181 } 182 183 ; The loads and the stores are interleved. Can't merge them. 184 ;CHECK: no_merge_loads 185 ;CHECK: movb 186 ;CHECK: movb 187 ;CHECK: movb 188 ;CHECK: movb 189 ;CHECK: ret 190 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 191 %1 = icmp sgt i32 %count, 0 192 br i1 %1, label %.lr.ph, label %._crit_edge 193 194 .lr.ph: ; preds = %0 195 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 196 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 197 br label %a4 198 199 a4: ; preds = %4, %.lr.ph 200 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] 201 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] 202 %a5 = load i8* %2, align 1 203 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 204 store i8 %a5, i8* %a7, align 1 205 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 206 %a6 = load i8* %3, align 1 207 store i8 %a6, i8* %a8, align 1 208 %a9 = add nsw i32 %i.02, 1 209 %a10 = getelementptr inbounds %struct.A* %.01, i64 1 210 %exitcond = icmp eq i32 %a9, %count 211 br i1 %exitcond, label %._crit_edge, label %a4 212 213 ._crit_edge: ; preds = %4, %0 214 ret void 215 } 216 217 218 ;CHECK: merge_loads_integer 219 ; load: 220 ;CHECK: movq 221 ; store: 222 ;CHECK: movq 223 ;CHECK: ret 224 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 225 %1 = icmp sgt i32 %count, 0 226 br i1 %1, label %.lr.ph, label %._crit_edge 227 228 .lr.ph: ; preds = %0 229 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 230 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 231 br label %4 232 233 ; <label>:4 ; preds = %4, %.lr.ph 234 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 235 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ] 236 %5 = load i32* %2 237 %6 = load i32* %3 238 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 239 store i32 %5, i32* %7 240 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 241 store i32 %6, i32* %8 242 %9 = add nsw i32 %i.02, 1 243 %10 = getelementptr inbounds %struct.B* %.01, i64 1 244 %exitcond = icmp eq i32 %9, %count 245 br i1 %exitcond, label %._crit_edge, label %4 246 247 ._crit_edge: ; preds = %4, %0 248 ret void 249 } 250 251 252 ;CHECK: merge_loads_vector 253 ; load: 254 ;CHECK: movups 255 ; store: 256 ;CHECK: movups 257 ;CHECK: ret 258 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 259 %a1 = icmp sgt i32 %count, 0 260 br i1 %a1, label %.lr.ph, label %._crit_edge 261 262 .lr.ph: ; preds = %0 263 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 264 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 265 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 266 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 267 br label %block4 268 269 block4: ; preds = %4, %.lr.ph 270 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 271 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 272 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 273 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 274 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 275 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 276 %b1 = load i32* %a2 277 %b2 = load i32* %a3 278 %b3 = load i32* %a4 279 %b4 = load i32* %a5 280 store i32 %b1, i32* %a7 281 store i32 %b2, i32* %a8 282 store i32 %b3, i32* %a9 283 store i32 %b4, i32* %a10 284 %c9 = add nsw i32 %i.02, 1 285 %c10 = getelementptr inbounds %struct.B* %.01, i64 1 286 %exitcond = icmp eq i32 %c9, %count 287 br i1 %exitcond, label %._crit_edge, label %block4 288 289 ._crit_edge: ; preds = %4, %0 290 ret void 291 } 292 293 ;CHECK: merge_loads_no_align 294 ; load: 295 ;CHECK: movl 296 ;CHECK: movl 297 ;CHECK: movl 298 ;CHECK: movl 299 ; store: 300 ;CHECK: movl 301 ;CHECK: movl 302 ;CHECK: movl 303 ;CHECK: movl 304 ;CHECK: ret 305 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 306 %a1 = icmp sgt i32 %count, 0 307 br i1 %a1, label %.lr.ph, label %._crit_edge 308 309 .lr.ph: ; preds = %0 310 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 311 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 312 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 313 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 314 br label %block4 315 316 block4: ; preds = %4, %.lr.ph 317 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 318 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 319 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 320 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 321 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 322 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 323 %b1 = load i32* %a2, align 1 324 %b2 = load i32* %a3, align 1 325 %b3 = load i32* %a4, align 1 326 %b4 = load i32* %a5, align 1 327 store i32 %b1, i32* %a7, align 1 328 store i32 %b2, i32* %a8, align 1 329 store i32 %b3, i32* %a9, align 1 330 store i32 %b4, i32* %a10, align 1 331 %c9 = add nsw i32 %i.02, 1 332 %c10 = getelementptr inbounds %struct.B* %.01, i64 1 333 %exitcond = icmp eq i32 %c9, %count 334 br i1 %exitcond, label %._crit_edge, label %block4 335 336 ._crit_edge: ; preds = %4, %0 337 ret void 338 } 339 340