1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s 2 3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 target triple = "x86_64-apple-macosx10.8.0" 5 6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } 7 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } 8 9 ; CHECK: merge_const_store 10 ; save 1,2,3 ... as one big integer. 11 ; CHECK: movabsq $578437695752307201 12 ; CHECK: ret 13 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 14 %1 = icmp sgt i32 %count, 0 15 br i1 %1, label %.lr.ph, label %._crit_edge 16 .lr.ph: 17 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 18 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 19 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 20 store i8 1, i8* %2, align 1 21 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 22 store i8 2, i8* %3, align 1 23 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 24 store i8 3, i8* %4, align 1 25 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 26 store i8 4, i8* %5, align 1 27 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 28 store i8 5, i8* %6, align 1 29 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 30 store i8 6, i8* %7, align 1 31 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 32 store i8 7, i8* %8, align 1 33 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 34 store i8 8, i8* %9, align 1 35 %10 = add nsw i32 %i.02, 1 36 %11 = getelementptr inbounds %struct.A* %.01, i64 1 37 %exitcond = icmp eq i32 %10, %count 38 br i1 %exitcond, label %._crit_edge, label %.lr.ph 39 ._crit_edge: 40 ret void 41 } 42 43 ; No vectors because we use noimplicitfloat 44 ; CHECK: merge_const_store_no_vec 45 ; CHECK-NOT: vmovups 46 ; CHECK: ret 47 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{ 48 %1 = icmp sgt i32 %count, 0 49 br i1 %1, label %.lr.ph, label %._crit_edge 50 .lr.ph: 51 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 52 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 53 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 54 store i32 0, i32* %2, align 4 55 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 56 store i32 0, i32* %3, align 4 57 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 58 store i32 0, i32* %4, align 4 59 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 60 store i32 0, i32* %5, align 4 61 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4 62 store i32 0, i32* %6, align 4 63 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5 64 store i32 0, i32* %7, align 4 65 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6 66 store i32 0, i32* %8, align 4 67 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7 68 store i32 0, i32* %9, align 4 69 %10 = add nsw i32 %i.02, 1 70 %11 = getelementptr inbounds %struct.B* %.01, i64 1 71 %exitcond = icmp eq i32 %10, %count 72 br i1 %exitcond, label %._crit_edge, label %.lr.ph 73 ._crit_edge: 74 ret void 75 } 76 77 ; Move the constants using a single vector store. 78 ; CHECK: merge_const_store_vec 79 ; CHECK: vmovups 80 ; CHECK: ret 81 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp { 82 %1 = icmp sgt i32 %count, 0 83 br i1 %1, label %.lr.ph, label %._crit_edge 84 .lr.ph: 85 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 86 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 87 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 88 store i32 0, i32* %2, align 4 89 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 90 store i32 0, i32* %3, align 4 91 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 92 store i32 0, i32* %4, align 4 93 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 94 store i32 0, i32* %5, align 4 95 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4 96 store i32 0, i32* %6, align 4 97 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5 98 store i32 0, i32* %7, align 4 99 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6 100 store i32 0, i32* %8, align 4 101 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7 102 store i32 0, i32* %9, align 4 103 %10 = add nsw i32 %i.02, 1 104 %11 = getelementptr inbounds %struct.B* %.01, i64 1 105 %exitcond = icmp eq i32 %10, %count 106 br i1 %exitcond, label %._crit_edge, label %.lr.ph 107 ._crit_edge: 108 ret void 109 } 110 111 ; Move the first 4 constants as a single vector. Move the rest as scalars. 112 ; CHECK: merge_nonconst_store 113 ; CHECK: movl $67305985 114 ; CHECK: movb 115 ; CHECK: movb 116 ; CHECK: movb 117 ; CHECK: movb 118 ; CHECK: ret 119 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 120 %1 = icmp sgt i32 %count, 0 121 br i1 %1, label %.lr.ph, label %._crit_edge 122 .lr.ph: 123 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 124 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 125 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 126 store i8 1, i8* %2, align 1 127 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 128 store i8 2, i8* %3, align 1 129 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 130 store i8 3, i8* %4, align 1 131 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 132 store i8 4, i8* %5, align 1 133 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 134 store i8 %zz, i8* %6, align 1 ; <----------- Not a const; 135 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 136 store i8 6, i8* %7, align 1 137 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 138 store i8 7, i8* %8, align 1 139 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 140 store i8 8, i8* %9, align 1 141 %10 = add nsw i32 %i.02, 1 142 %11 = getelementptr inbounds %struct.A* %.01, i64 1 143 %exitcond = icmp eq i32 %10, %count 144 br i1 %exitcond, label %._crit_edge, label %.lr.ph 145 ._crit_edge: 146 ret void 147 } 148 149 150 ;CHECK-LABEL: merge_loads_i16: 151 ; load: 152 ;CHECK: movw 153 ; store: 154 ;CHECK: movw 155 ;CHECK: ret 156 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 157 %1 = icmp sgt i32 %count, 0 158 br i1 %1, label %.lr.ph, label %._crit_edge 159 160 .lr.ph: ; preds = %0 161 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 162 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 163 br label %4 164 165 ; <label>:4 ; preds = %4, %.lr.ph 166 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 167 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] 168 %5 = load i8* %2, align 1 169 %6 = load i8* %3, align 1 170 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 171 store i8 %5, i8* %7, align 1 172 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 173 store i8 %6, i8* %8, align 1 174 %9 = add nsw i32 %i.02, 1 175 %10 = getelementptr inbounds %struct.A* %.01, i64 1 176 %exitcond = icmp eq i32 %9, %count 177 br i1 %exitcond, label %._crit_edge, label %4 178 179 ._crit_edge: ; preds = %4, %0 180 ret void 181 } 182 183 ; The loads and the stores are interleved. Can't merge them. 184 ;CHECK-LABEL: no_merge_loads: 185 ;CHECK: movb 186 ;CHECK: movb 187 ;CHECK: movb 188 ;CHECK: movb 189 ;CHECK: ret 190 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 191 %1 = icmp sgt i32 %count, 0 192 br i1 %1, label %.lr.ph, label %._crit_edge 193 194 .lr.ph: ; preds = %0 195 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 196 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 197 br label %a4 198 199 a4: ; preds = %4, %.lr.ph 200 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] 201 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] 202 %a5 = load i8* %2, align 1 203 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 204 store i8 %a5, i8* %a7, align 1 205 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 206 %a6 = load i8* %3, align 1 207 store i8 %a6, i8* %a8, align 1 208 %a9 = add nsw i32 %i.02, 1 209 %a10 = getelementptr inbounds %struct.A* %.01, i64 1 210 %exitcond = icmp eq i32 %a9, %count 211 br i1 %exitcond, label %._crit_edge, label %a4 212 213 ._crit_edge: ; preds = %4, %0 214 ret void 215 } 216 217 218 ;CHECK-LABEL: merge_loads_integer: 219 ; load: 220 ;CHECK: movq 221 ; store: 222 ;CHECK: movq 223 ;CHECK: ret 224 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 225 %1 = icmp sgt i32 %count, 0 226 br i1 %1, label %.lr.ph, label %._crit_edge 227 228 .lr.ph: ; preds = %0 229 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 230 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 231 br label %4 232 233 ; <label>:4 ; preds = %4, %.lr.ph 234 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 235 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ] 236 %5 = load i32* %2 237 %6 = load i32* %3 238 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 239 store i32 %5, i32* %7 240 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 241 store i32 %6, i32* %8 242 %9 = add nsw i32 %i.02, 1 243 %10 = getelementptr inbounds %struct.B* %.01, i64 1 244 %exitcond = icmp eq i32 %9, %count 245 br i1 %exitcond, label %._crit_edge, label %4 246 247 ._crit_edge: ; preds = %4, %0 248 ret void 249 } 250 251 252 ;CHECK-LABEL: merge_loads_vector: 253 ; load: 254 ;CHECK: movups 255 ; store: 256 ;CHECK: movups 257 ;CHECK: ret 258 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 259 %a1 = icmp sgt i32 %count, 0 260 br i1 %a1, label %.lr.ph, label %._crit_edge 261 262 .lr.ph: ; preds = %0 263 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 264 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 265 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 266 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 267 br label %block4 268 269 block4: ; preds = %4, %.lr.ph 270 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 271 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 272 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 273 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 274 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 275 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 276 %b1 = load i32* %a2 277 %b2 = load i32* %a3 278 %b3 = load i32* %a4 279 %b4 = load i32* %a5 280 store i32 %b1, i32* %a7 281 store i32 %b2, i32* %a8 282 store i32 %b3, i32* %a9 283 store i32 %b4, i32* %a10 284 %c9 = add nsw i32 %i.02, 1 285 %c10 = getelementptr inbounds %struct.B* %.01, i64 1 286 %exitcond = icmp eq i32 %c9, %count 287 br i1 %exitcond, label %._crit_edge, label %block4 288 289 ._crit_edge: ; preds = %4, %0 290 ret void 291 } 292 293 ;CHECK-LABEL: merge_loads_no_align: 294 ; load: 295 ;CHECK: movl 296 ;CHECK: movl 297 ;CHECK: movl 298 ;CHECK: movl 299 ; store: 300 ;CHECK: movl 301 ;CHECK: movl 302 ;CHECK: movl 303 ;CHECK: movl 304 ;CHECK: ret 305 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 306 %a1 = icmp sgt i32 %count, 0 307 br i1 %a1, label %.lr.ph, label %._crit_edge 308 309 .lr.ph: ; preds = %0 310 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 311 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 312 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 313 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 314 br label %block4 315 316 block4: ; preds = %4, %.lr.ph 317 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 318 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 319 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 320 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 321 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 322 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 323 %b1 = load i32* %a2, align 1 324 %b2 = load i32* %a3, align 1 325 %b3 = load i32* %a4, align 1 326 %b4 = load i32* %a5, align 1 327 store i32 %b1, i32* %a7, align 1 328 store i32 %b2, i32* %a8, align 1 329 store i32 %b3, i32* %a9, align 1 330 store i32 %b4, i32* %a10, align 1 331 %c9 = add nsw i32 %i.02, 1 332 %c10 = getelementptr inbounds %struct.B* %.01, i64 1 333 %exitcond = icmp eq i32 %c9, %count 334 br i1 %exitcond, label %._crit_edge, label %block4 335 336 ._crit_edge: ; preds = %4, %0 337 ret void 338 } 339 340 ; Make sure that we merge the consecutive load/store sequence below and use a 341 ; word (16 bit) instead of a byte copy. 342 ; CHECK: MergeLoadStoreBaseIndexOffset 343 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 344 ; CHECK: movw [[REG]], (%{{.*}}) 345 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { 346 br label %1 347 348 ; <label>:1 349 %.09 = phi i32 [ %n, %0 ], [ %11, %1 ] 350 %.08 = phi i8* [ %b, %0 ], [ %10, %1 ] 351 %.0 = phi i64* [ %a, %0 ], [ %2, %1 ] 352 %2 = getelementptr inbounds i64* %.0, i64 1 353 %3 = load i64* %.0, align 1 354 %4 = getelementptr inbounds i8* %c, i64 %3 355 %5 = load i8* %4, align 1 356 %6 = add i64 %3, 1 357 %7 = getelementptr inbounds i8* %c, i64 %6 358 %8 = load i8* %7, align 1 359 store i8 %5, i8* %.08, align 1 360 %9 = getelementptr inbounds i8* %.08, i64 1 361 store i8 %8, i8* %9, align 1 362 %10 = getelementptr inbounds i8* %.08, i64 2 363 %11 = add nsw i32 %.09, -1 364 %12 = icmp eq i32 %11, 0 365 br i1 %12, label %13, label %1 366 367 ; <label>:13 368 ret void 369 } 370 371 ; Make sure that we merge the consecutive load/store sequence below and use a 372 ; word (16 bit) instead of a byte copy even if there are intermediate sign 373 ; extensions. 374 ; CHECK: MergeLoadStoreBaseIndexOffsetSext 375 ; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 376 ; CHECK: movw [[REG]], (%{{.*}}) 377 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { 378 br label %1 379 380 ; <label>:1 381 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 382 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 383 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 384 %2 = getelementptr inbounds i8* %.0, i64 1 385 %3 = load i8* %.0, align 1 386 %4 = sext i8 %3 to i64 387 %5 = getelementptr inbounds i8* %c, i64 %4 388 %6 = load i8* %5, align 1 389 %7 = add i64 %4, 1 390 %8 = getelementptr inbounds i8* %c, i64 %7 391 %9 = load i8* %8, align 1 392 store i8 %6, i8* %.08, align 1 393 %10 = getelementptr inbounds i8* %.08, i64 1 394 store i8 %9, i8* %10, align 1 395 %11 = getelementptr inbounds i8* %.08, i64 2 396 %12 = add nsw i32 %.09, -1 397 %13 = icmp eq i32 %12, 0 398 br i1 %13, label %14, label %1 399 400 ; <label>:14 401 ret void 402 } 403 404 ; However, we can only merge ignore sign extensions when they are on all memory 405 ; computations; 406 ; CHECK: loadStoreBaseIndexOffsetSextNoSex 407 ; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 408 ; CHECK-NOT: movw [[REG]], (%{{.*}}) 409 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { 410 br label %1 411 412 ; <label>:1 413 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 414 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 415 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 416 %2 = getelementptr inbounds i8* %.0, i64 1 417 %3 = load i8* %.0, align 1 418 %4 = sext i8 %3 to i64 419 %5 = getelementptr inbounds i8* %c, i64 %4 420 %6 = load i8* %5, align 1 421 %7 = add i8 %3, 1 422 %wrap.4 = sext i8 %7 to i64 423 %8 = getelementptr inbounds i8* %c, i64 %wrap.4 424 %9 = load i8* %8, align 1 425 store i8 %6, i8* %.08, align 1 426 %10 = getelementptr inbounds i8* %.08, i64 1 427 store i8 %9, i8* %10, align 1 428 %11 = getelementptr inbounds i8* %.08, i64 2 429 %12 = add nsw i32 %.09, -1 430 %13 = icmp eq i32 %12, 0 431 br i1 %13, label %14, label %1 432 433 ; <label>:14 434 ret void 435 } 436