1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s 2 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s 3 4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5 target triple = "x86_64-apple-macosx10.8.0" 6 7 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } 8 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } 9 10 ; CHECK: merge_const_store 11 ; save 1,2,3 ... as one big integer. 12 ; CHECK: movabsq $578437695752307201 13 ; CHECK: ret 14 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 15 %1 = icmp sgt i32 %count, 0 16 br i1 %1, label %.lr.ph, label %._crit_edge 17 .lr.ph: 18 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 19 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 20 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 21 store i8 1, i8* %2, align 1 22 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 23 store i8 2, i8* %3, align 1 24 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 25 store i8 3, i8* %4, align 1 26 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 27 store i8 4, i8* %5, align 1 28 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 29 store i8 5, i8* %6, align 1 30 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 31 store i8 6, i8* %7, align 1 32 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 33 store i8 7, i8* %8, align 1 34 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 35 store i8 8, i8* %9, align 1 36 %10 = add nsw i32 %i.02, 1 37 %11 = getelementptr inbounds %struct.A* %.01, i64 1 38 %exitcond = icmp eq i32 %10, %count 39 br i1 %exitcond, label %._crit_edge, label %.lr.ph 40 ._crit_edge: 41 ret void 42 } 43 44 ; No vectors because we use noimplicitfloat 45 ; CHECK: merge_const_store_no_vec 46 ; CHECK-NOT: vmovups 47 ; CHECK: ret 48 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{ 49 %1 = icmp sgt i32 %count, 0 50 br i1 %1, label %.lr.ph, label %._crit_edge 51 .lr.ph: 52 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 53 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 54 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 55 store i32 0, i32* %2, align 4 56 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 57 store i32 0, i32* %3, align 4 58 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 59 store i32 0, i32* %4, align 4 60 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 61 store i32 0, i32* %5, align 4 62 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4 63 store i32 0, i32* %6, align 4 64 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5 65 store i32 0, i32* %7, align 4 66 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6 67 store i32 0, i32* %8, align 4 68 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7 69 store i32 0, i32* %9, align 4 70 %10 = add nsw i32 %i.02, 1 71 %11 = getelementptr inbounds %struct.B* %.01, i64 1 72 %exitcond = icmp eq i32 %10, %count 73 br i1 %exitcond, label %._crit_edge, label %.lr.ph 74 ._crit_edge: 75 ret void 76 } 77 78 ; Move the constants using a single vector store. 79 ; CHECK: merge_const_store_vec 80 ; CHECK: vmovups 81 ; CHECK: ret 82 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp { 83 %1 = icmp sgt i32 %count, 0 84 br i1 %1, label %.lr.ph, label %._crit_edge 85 .lr.ph: 86 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 87 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 88 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 89 store i32 0, i32* %2, align 4 90 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 91 store i32 0, i32* %3, align 4 92 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 93 store i32 0, i32* %4, align 4 94 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 95 store i32 0, i32* %5, align 4 96 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4 97 store i32 0, i32* %6, align 4 98 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5 99 store i32 0, i32* %7, align 4 100 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6 101 store i32 0, i32* %8, align 4 102 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7 103 store i32 0, i32* %9, align 4 104 %10 = add nsw i32 %i.02, 1 105 %11 = getelementptr inbounds %struct.B* %.01, i64 1 106 %exitcond = icmp eq i32 %10, %count 107 br i1 %exitcond, label %._crit_edge, label %.lr.ph 108 ._crit_edge: 109 ret void 110 } 111 112 ; Move the first 4 constants as a single vector. Move the rest as scalars. 113 ; CHECK: merge_nonconst_store 114 ; CHECK: movl $67305985 115 ; CHECK: movb 116 ; CHECK: movb 117 ; CHECK: movb 118 ; CHECK: movb 119 ; CHECK: ret 120 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 121 %1 = icmp sgt i32 %count, 0 122 br i1 %1, label %.lr.ph, label %._crit_edge 123 .lr.ph: 124 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 125 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 126 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 127 store i8 1, i8* %2, align 1 128 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 129 store i8 2, i8* %3, align 1 130 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 131 store i8 3, i8* %4, align 1 132 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 133 store i8 4, i8* %5, align 1 134 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 135 store i8 %zz, i8* %6, align 1 ; <----------- Not a const; 136 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 137 store i8 6, i8* %7, align 1 138 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 139 store i8 7, i8* %8, align 1 140 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 141 store i8 8, i8* %9, align 1 142 %10 = add nsw i32 %i.02, 1 143 %11 = getelementptr inbounds %struct.A* %.01, i64 1 144 %exitcond = icmp eq i32 %10, %count 145 br i1 %exitcond, label %._crit_edge, label %.lr.ph 146 ._crit_edge: 147 ret void 148 } 149 150 151 ;CHECK-LABEL: merge_loads_i16: 152 ; load: 153 ;CHECK: movw 154 ; store: 155 ;CHECK: movw 156 ;CHECK: ret 157 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 158 %1 = icmp sgt i32 %count, 0 159 br i1 %1, label %.lr.ph, label %._crit_edge 160 161 .lr.ph: ; preds = %0 162 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 163 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 164 br label %4 165 166 ; <label>:4 ; preds = %4, %.lr.ph 167 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 168 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] 169 %5 = load i8* %2, align 1 170 %6 = load i8* %3, align 1 171 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 172 store i8 %5, i8* %7, align 1 173 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 174 store i8 %6, i8* %8, align 1 175 %9 = add nsw i32 %i.02, 1 176 %10 = getelementptr inbounds %struct.A* %.01, i64 1 177 %exitcond = icmp eq i32 %9, %count 178 br i1 %exitcond, label %._crit_edge, label %4 179 180 ._crit_edge: ; preds = %4, %0 181 ret void 182 } 183 184 ; The loads and the stores are interleved. Can't merge them. 185 ;CHECK-LABEL: no_merge_loads: 186 ;CHECK: movb 187 ;CHECK: movb 188 ;CHECK: movb 189 ;CHECK: movb 190 ;CHECK: ret 191 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 192 %1 = icmp sgt i32 %count, 0 193 br i1 %1, label %.lr.ph, label %._crit_edge 194 195 .lr.ph: ; preds = %0 196 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 197 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 198 br label %a4 199 200 a4: ; preds = %4, %.lr.ph 201 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] 202 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] 203 %a5 = load i8* %2, align 1 204 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 205 store i8 %a5, i8* %a7, align 1 206 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 207 %a6 = load i8* %3, align 1 208 store i8 %a6, i8* %a8, align 1 209 %a9 = add nsw i32 %i.02, 1 210 %a10 = getelementptr inbounds %struct.A* %.01, i64 1 211 %exitcond = icmp eq i32 %a9, %count 212 br i1 %exitcond, label %._crit_edge, label %a4 213 214 ._crit_edge: ; preds = %4, %0 215 ret void 216 } 217 218 219 ;CHECK-LABEL: merge_loads_integer: 220 ; load: 221 ;CHECK: movq 222 ; store: 223 ;CHECK: movq 224 ;CHECK: ret 225 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 226 %1 = icmp sgt i32 %count, 0 227 br i1 %1, label %.lr.ph, label %._crit_edge 228 229 .lr.ph: ; preds = %0 230 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 231 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 232 br label %4 233 234 ; <label>:4 ; preds = %4, %.lr.ph 235 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 236 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ] 237 %5 = load i32* %2 238 %6 = load i32* %3 239 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 240 store i32 %5, i32* %7 241 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 242 store i32 %6, i32* %8 243 %9 = add nsw i32 %i.02, 1 244 %10 = getelementptr inbounds %struct.B* %.01, i64 1 245 %exitcond = icmp eq i32 %9, %count 246 br i1 %exitcond, label %._crit_edge, label %4 247 248 ._crit_edge: ; preds = %4, %0 249 ret void 250 } 251 252 253 ;CHECK-LABEL: merge_loads_vector: 254 ; load: 255 ;CHECK: movups 256 ; store: 257 ;CHECK: movups 258 ;CHECK: ret 259 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 260 %a1 = icmp sgt i32 %count, 0 261 br i1 %a1, label %.lr.ph, label %._crit_edge 262 263 .lr.ph: ; preds = %0 264 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 265 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 266 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 267 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 268 br label %block4 269 270 block4: ; preds = %4, %.lr.ph 271 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 272 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 273 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 274 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 275 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 276 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 277 %b1 = load i32* %a2 278 %b2 = load i32* %a3 279 %b3 = load i32* %a4 280 %b4 = load i32* %a5 281 store i32 %b1, i32* %a7 282 store i32 %b2, i32* %a8 283 store i32 %b3, i32* %a9 284 store i32 %b4, i32* %a10 285 %c9 = add nsw i32 %i.02, 1 286 %c10 = getelementptr inbounds %struct.B* %.01, i64 1 287 %exitcond = icmp eq i32 %c9, %count 288 br i1 %exitcond, label %._crit_edge, label %block4 289 290 ._crit_edge: ; preds = %4, %0 291 ret void 292 } 293 294 ;CHECK-LABEL: merge_loads_no_align: 295 ; load: 296 ;CHECK: movl 297 ;CHECK: movl 298 ;CHECK: movl 299 ;CHECK: movl 300 ; store: 301 ;CHECK: movl 302 ;CHECK: movl 303 ;CHECK: movl 304 ;CHECK: movl 305 ;CHECK: ret 306 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 307 %a1 = icmp sgt i32 %count, 0 308 br i1 %a1, label %.lr.ph, label %._crit_edge 309 310 .lr.ph: ; preds = %0 311 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 312 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 313 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 314 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 315 br label %block4 316 317 block4: ; preds = %4, %.lr.ph 318 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 319 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 320 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 321 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 322 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 323 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 324 %b1 = load i32* %a2, align 1 325 %b2 = load i32* %a3, align 1 326 %b3 = load i32* %a4, align 1 327 %b4 = load i32* %a5, align 1 328 store i32 %b1, i32* %a7, align 1 329 store i32 %b2, i32* %a8, align 1 330 store i32 %b3, i32* %a9, align 1 331 store i32 %b4, i32* %a10, align 1 332 %c9 = add nsw i32 %i.02, 1 333 %c10 = getelementptr inbounds %struct.B* %.01, i64 1 334 %exitcond = icmp eq i32 %c9, %count 335 br i1 %exitcond, label %._crit_edge, label %block4 336 337 ._crit_edge: ; preds = %4, %0 338 ret void 339 } 340 341 ; Make sure that we merge the consecutive load/store sequence below and use a 342 ; word (16 bit) instead of a byte copy. 343 ; CHECK: MergeLoadStoreBaseIndexOffset 344 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 345 ; CHECK: movw [[REG]], (%{{.*}}) 346 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { 347 br label %1 348 349 ; <label>:1 350 %.09 = phi i32 [ %n, %0 ], [ %11, %1 ] 351 %.08 = phi i8* [ %b, %0 ], [ %10, %1 ] 352 %.0 = phi i64* [ %a, %0 ], [ %2, %1 ] 353 %2 = getelementptr inbounds i64* %.0, i64 1 354 %3 = load i64* %.0, align 1 355 %4 = getelementptr inbounds i8* %c, i64 %3 356 %5 = load i8* %4, align 1 357 %6 = add i64 %3, 1 358 %7 = getelementptr inbounds i8* %c, i64 %6 359 %8 = load i8* %7, align 1 360 store i8 %5, i8* %.08, align 1 361 %9 = getelementptr inbounds i8* %.08, i64 1 362 store i8 %8, i8* %9, align 1 363 %10 = getelementptr inbounds i8* %.08, i64 2 364 %11 = add nsw i32 %.09, -1 365 %12 = icmp eq i32 %11, 0 366 br i1 %12, label %13, label %1 367 368 ; <label>:13 369 ret void 370 } 371 372 ; Make sure that we merge the consecutive load/store sequence below and use a 373 ; word (16 bit) instead of a byte copy even if there are intermediate sign 374 ; extensions. 375 ; CHECK: MergeLoadStoreBaseIndexOffsetSext 376 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 377 ; CHECK: movw [[REG]], (%{{.*}}) 378 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { 379 br label %1 380 381 ; <label>:1 382 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 383 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 384 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 385 %2 = getelementptr inbounds i8* %.0, i64 1 386 %3 = load i8* %.0, align 1 387 %4 = sext i8 %3 to i64 388 %5 = getelementptr inbounds i8* %c, i64 %4 389 %6 = load i8* %5, align 1 390 %7 = add i64 %4, 1 391 %8 = getelementptr inbounds i8* %c, i64 %7 392 %9 = load i8* %8, align 1 393 store i8 %6, i8* %.08, align 1 394 %10 = getelementptr inbounds i8* %.08, i64 1 395 store i8 %9, i8* %10, align 1 396 %11 = getelementptr inbounds i8* %.08, i64 2 397 %12 = add nsw i32 %.09, -1 398 %13 = icmp eq i32 %12, 0 399 br i1 %13, label %14, label %1 400 401 ; <label>:14 402 ret void 403 } 404 405 ; However, we can only merge ignore sign extensions when they are on all memory 406 ; computations; 407 ; CHECK: loadStoreBaseIndexOffsetSextNoSex 408 ; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 409 ; CHECK-NOT: movw [[REG]], (%{{.*}}) 410 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { 411 br label %1 412 413 ; <label>:1 414 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 415 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 416 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 417 %2 = getelementptr inbounds i8* %.0, i64 1 418 %3 = load i8* %.0, align 1 419 %4 = sext i8 %3 to i64 420 %5 = getelementptr inbounds i8* %c, i64 %4 421 %6 = load i8* %5, align 1 422 %7 = add i8 %3, 1 423 %wrap.4 = sext i8 %7 to i64 424 %8 = getelementptr inbounds i8* %c, i64 %wrap.4 425 %9 = load i8* %8, align 1 426 store i8 %6, i8* %.08, align 1 427 %10 = getelementptr inbounds i8* %.08, i64 1 428 store i8 %9, i8* %10, align 1 429 %11 = getelementptr inbounds i8* %.08, i64 2 430 %12 = add nsw i32 %.09, -1 431 %13 = icmp eq i32 %12, 0 432 br i1 %13, label %14, label %1 433 434 ; <label>:14 435 ret void 436 } 437