1 ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL 2 ; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH 3 ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64 4 ; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED 5 ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX 6 7 %class.Class = type { i32 } 8 %struct.s = type { i64 } 9 10 declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) 11 declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d) 12 declare x86_thiscallcc void @thiscall(%class.Class* %class, i32 %a, i32 %b, i32 %c, i32 %d) 13 declare void @oneparam(i32 %a) 14 declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) 15 declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d) 16 declare void @inalloca(<{ %struct.s }>* inalloca) 17 18 declare i8* @llvm.stacksave() 19 declare void @llvm.stackrestore(i8*) 20 21 ; We should get pushes for x86, even though there is a reserved call frame. 22 ; Make sure we don't touch x86-64, and that turning it off works. 23 ; NORMAL-LABEL: test1: 24 ; NORMAL-NOT: subl {{.*}} %esp 25 ; NORMAL: pushl $4 26 ; NORMAL-NEXT: pushl $3 27 ; NORMAL-NEXT: pushl $2 28 ; NORMAL-NEXT: pushl $1 29 ; NORMAL-NEXT: call 30 ; NORMAL-NEXT: addl $16, %esp 31 ; X64-LABEL: test1: 32 ; X64: movl $1, %ecx 33 ; X64-NEXT: movl $2, %edx 34 ; X64-NEXT: movl $3, %r8d 35 ; X64-NEXT: movl $4, %r9d 36 ; X64-NEXT: callq good 37 ; NOPUSH-LABEL: test1: 38 ; NOPUSH: subl $16, %esp 39 ; NOPUSH-NEXT: movl $4, 12(%esp) 40 ; NOPUSH-NEXT: movl $3, 8(%esp) 41 ; NOPUSH-NEXT: movl $2, 4(%esp) 42 ; NOPUSH-NEXT: movl $1, (%esp) 43 ; NOPUSH-NEXT: call 44 ; NOPUSH-NEXT: addl $16, %esp 45 define void @test1() { 46 entry: 47 call void @good(i32 1, i32 2, i32 3, i32 4) 48 ret void 49 } 50 51 ; If we have a reserved frame, we should have pushes 52 ; NORMAL-LABEL: test2: 53 ; NORMAL-NOT: subl {{.*}} %esp 54 ; NORMAL: pushl $4 55 ; NORMAL-NEXT: pushl $3 56 ; NORMAL-NEXT: pushl $2 57 ; NORMAL-NEXT: pushl $1 58 ; NORMAL-NEXT: call 59 define void @test2(i32 %k) { 60 entry: 61 %a = alloca i32, i32 %k 62 call void @good(i32 1, i32 2, i32 3, i32 4) 63 ret void 64 } 65 66 ; Again, we expect a sequence of 4 immediate pushes 67 ; Checks that we generate the right pushes for >8bit immediates 68 ; NORMAL-LABEL: test2b: 69 ; NORMAL-NOT: subl {{.*}} %esp 70 ; NORMAL: pushl $4096 71 ; NORMAL-NEXT: pushl $3072 72 ; NORMAL-NEXT: pushl $2048 73 ; NORMAL-NEXT: pushl $1024 74 ; NORMAL-NEXT: call 75 ; NORMAL-NEXT: addl $16, %esp 76 define void @test2b() optsize { 77 entry: 78 call void @good(i32 1024, i32 2048, i32 3072, i32 4096) 79 ret void 80 } 81 82 ; The first push should push a register 83 ; NORMAL-LABEL: test3: 84 ; NORMAL-NOT: subl {{.*}} %esp 85 ; NORMAL: pushl $4 86 ; NORMAL-NEXT: pushl $3 87 ; NORMAL-NEXT: pushl $2 88 ; NORMAL-NEXT: pushl %e{{..}} 89 ; NORMAL-NEXT: call 90 ; NORMAL-NEXT: addl $16, %esp 91 define void @test3(i32 %k) optsize { 92 entry: 93 %f = add i32 %k, 1 94 call void @good(i32 %f, i32 2, i32 3, i32 4) 95 ret void 96 } 97 98 ; We support weird calling conventions 99 ; NORMAL-LABEL: test4: 100 ; NORMAL: movl $2, %eax 101 ; NORMAL-NEXT: pushl $4 102 ; NORMAL-NEXT: pushl $3 103 ; NORMAL-NEXT: pushl $1 104 ; NORMAL-NEXT: call 105 ; NORMAL-NEXT: addl $12, %esp 106 define void @test4() optsize { 107 entry: 108 call void @inreg(i32 1, i32 2, i32 3, i32 4) 109 ret void 110 } 111 112 ; NORMAL-LABEL: test4b: 113 ; NORMAL: movl 4(%esp), %ecx 114 ; NORMAL-NEXT: pushl $4 115 ; NORMAL-NEXT: pushl $3 116 ; NORMAL-NEXT: pushl $2 117 ; NORMAL-NEXT: pushl $1 118 ; NORMAL-NEXT: call 119 ; NORMAL-NEXT: ret 120 define void @test4b(%class.Class* %f) optsize { 121 entry: 122 call x86_thiscallcc void @thiscall(%class.Class* %f, i32 1, i32 2, i32 3, i32 4) 123 ret void 124 } 125 126 ; When there is no reserved call frame, check that additional alignment 127 ; is added when the pushes don't add up to the required alignment. 128 ; ALIGNED-LABEL: test5: 129 ; ALIGNED: subl $16, %esp 130 ; ALIGNED-NEXT: pushl $4 131 ; ALIGNED-NEXT: pushl $3 132 ; ALIGNED-NEXT: pushl $2 133 ; ALIGNED-NEXT: pushl $1 134 ; ALIGNED-NEXT: call 135 define void @test5(i32 %k) { 136 entry: 137 %a = alloca i32, i32 %k 138 call void @good(i32 1, i32 2, i32 3, i32 4) 139 ret void 140 } 141 142 ; When the alignment adds up, do the transformation 143 ; ALIGNED-LABEL: test5b: 144 ; ALIGNED: pushl $8 145 ; ALIGNED-NEXT: pushl $7 146 ; ALIGNED-NEXT: pushl $6 147 ; ALIGNED-NEXT: pushl $5 148 ; ALIGNED-NEXT: pushl $4 149 ; ALIGNED-NEXT: pushl $3 150 ; ALIGNED-NEXT: pushl $2 151 ; ALIGNED-NEXT: pushl $1 152 ; ALIGNED-NEXT: call 153 define void @test5b() optsize { 154 entry: 155 call void @eightparams(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) 156 ret void 157 } 158 159 ; When having to compensate for the alignment isn't worth it, 160 ; don't use pushes. 161 ; ALIGNED-LABEL: test5c: 162 ; ALIGNED: movl $1, (%esp) 163 ; ALIGNED-NEXT: call 164 define void @test5c() optsize { 165 entry: 166 call void @oneparam(i32 1) 167 ret void 168 } 169 170 ; Check that pushing the addresses of globals (Or generally, things that 171 ; aren't exactly immediates) isn't broken. 172 ; Fixes PR21878. 173 ; NORMAL-LABEL: test6: 174 ; NORMAL: pushl $_ext 175 ; NORMAL-NEXT: call 176 declare void @f(i8*) 177 @ext = external constant i8 178 179 define void @test6() { 180 call void @f(i8* @ext) 181 br label %bb 182 bb: 183 alloca i32 184 ret void 185 } 186 187 ; Check that we fold simple cases into the push 188 ; NORMAL-LABEL: test7: 189 ; NORMAL-NOT: subl {{.*}} %esp 190 ; NORMAL: movl 4(%esp), [[EAX:%e..]] 191 ; NORMAL-NEXT: pushl $4 192 ; NORMAL-NEXT: pushl ([[EAX]]) 193 ; NORMAL-NEXT: pushl $2 194 ; NORMAL-NEXT: pushl $1 195 ; NORMAL-NEXT: call 196 ; NORMAL-NEXT: addl $16, %esp 197 define void @test7(i32* %ptr) optsize { 198 entry: 199 %val = load i32, i32* %ptr 200 call void @good(i32 1, i32 2, i32 %val, i32 4) 201 ret void 202 } 203 204 ; Fold stack-relative loads into the push, with correct offset 205 ; In particular, at the second push, %b was at 12(%esp) and 206 ; %a wast at 8(%esp), but the second push bumped %esp, so %a 207 ; is now it at 12(%esp) 208 ; NORMAL-LABEL: test8: 209 ; NORMAL: pushl $4 210 ; NORMAL-NEXT: pushl 12(%esp) 211 ; NORMAL-NEXT: pushl 12(%esp) 212 ; NORMAL-NEXT: pushl $1 213 ; NORMAL-NEXT: call 214 ; NORMAL-NEXT: addl $16, %esp 215 define void @test8(i32 %a, i32 %b) optsize { 216 entry: 217 call void @good(i32 1, i32 %a, i32 %b, i32 4) 218 ret void 219 } 220 221 ; If one function is using push instructions, and the other isn't 222 ; (because it has frame-index references), then we must resolve 223 ; these references correctly. 224 ; NORMAL-LABEL: test9: 225 ; NORMAL-NOT: leal (%esp), 226 ; NORMAL: pushl $4 227 ; NORMAL-NEXT: pushl $3 228 ; NORMAL-NEXT: pushl $2 229 ; NORMAL-NEXT: pushl $1 230 ; NORMAL-NEXT: call 231 ; NORMAL-NEXT: subl $4, %esp 232 ; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]] 233 ; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]] 234 ; NORMAL-NEXT: movl [[E2]], 4(%esp) 235 ; NORMAL-NEXT: movl [[E1]], (%esp) 236 ; NORMAL-NEXT: leal 32(%esp), [[E3:%e..]] 237 ; NORMAL-NEXT: movl [[E3]], 16(%esp) 238 ; NORMAL-NEXT: leal 28(%esp), [[E4:%e..]] 239 ; NORMAL-NEXT: movl [[E4]], 12(%esp) 240 ; NORMAL-NEXT: movl $6, 8(%esp) 241 ; NORMAL-NEXT: call 242 ; NORMAL-NEXT: addl $20, %esp 243 define void @test9() optsize { 244 entry: 245 %p = alloca i32, align 4 246 %q = alloca i32, align 4 247 %s = alloca %struct.s, align 4 248 call void @good(i32 1, i32 2, i32 3, i32 4) 249 %pv = ptrtoint i32* %p to i32 250 %qv = ptrtoint i32* %q to i32 251 call void @struct(%struct.s* byval %s, i32 6, i32 %qv, i32 %pv) 252 ret void 253 } 254 255 ; We can end up with an indirect call which gets reloaded on the spot. 256 ; Make sure we reference the correct stack slot - we spill into (%esp) 257 ; and reload from 16(%esp) due to the pushes. 258 ; NORMAL-LABEL: test10: 259 ; NORMAL: movl $_good, [[ALLOC:.*]] 260 ; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]] 261 ; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill 262 ; NORMAL: nop 263 ; NORMAL: pushl $4 264 ; NORMAL-NEXT: pushl $3 265 ; NORMAL-NEXT: pushl $2 266 ; NORMAL-NEXT: pushl $1 267 ; NORMAL-NEXT: calll *16(%esp) 268 ; NORMAL-NEXT: addl $24, %esp 269 define void @test10() optsize { 270 %stack_fptr = alloca void (i32, i32, i32, i32)* 271 store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr 272 %good_ptr = load volatile void (i32, i32, i32, i32)*, void (i32, i32, i32, i32)** %stack_fptr 273 call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() 274 call void (i32, i32, i32, i32) %good_ptr(i32 1, i32 2, i32 3, i32 4) 275 ret void 276 } 277 278 ; We can't fold the load from the global into the push because of 279 ; interference from the store 280 ; NORMAL-LABEL: test11: 281 ; NORMAL: movl _the_global, [[EAX:%e..]] 282 ; NORMAL-NEXT: movl $42, _the_global 283 ; NORMAL-NEXT: pushl $4 284 ; NORMAL-NEXT: pushl $3 285 ; NORMAL-NEXT: pushl $2 286 ; NORMAL-NEXT: pushl [[EAX]] 287 ; NORMAL-NEXT: call 288 ; NORMAL-NEXT: addl $16, %esp 289 @the_global = external global i32 290 define void @test11() optsize { 291 %myload = load i32, i32* @the_global 292 store i32 42, i32* @the_global 293 call void @good(i32 %myload, i32 2, i32 3, i32 4) 294 ret void 295 } 296 297 ; Converting one mov into a push isn't worth it when 298 ; doing so forces too much overhead for other calls. 299 ; NORMAL-LABEL: test12: 300 ; NORMAL: movl $8, 12(%esp) 301 ; NORMAL-NEXT: movl $7, 8(%esp) 302 ; NORMAL-NEXT: movl $6, 4(%esp) 303 ; NORMAL-NEXT: movl $5, (%esp) 304 ; NORMAL-NEXT: calll _good 305 define void @test12() optsize { 306 entry: 307 %s = alloca %struct.s, align 4 308 call void @struct(%struct.s* %s, i32 2, i32 3, i32 4) 309 call void @good(i32 5, i32 6, i32 7, i32 8) 310 call void @struct(%struct.s* %s, i32 10, i32 11, i32 12) 311 ret void 312 } 313 314 ; But if the gains outweigh the overhead, we should do it 315 ; NORMAL-LABEL: test12b: 316 ; NORMAL: pushl $4 317 ; NORMAL-NEXT: pushl $3 318 ; NORMAL-NEXT: pushl $2 319 ; NORMAL-NEXT: pushl $1 320 ; NORMAL-NEXT: calll _good 321 ; NORMAL-NEXT: subl $4, %esp 322 ; NORMAL: movl $8, 16(%esp) 323 ; NORMAL-NEXT: movl $7, 12(%esp) 324 ; NORMAL-NEXT: movl $6, 8(%esp) 325 ; NORMAL-NEXT: calll _struct 326 ; NORMAL-NEXT: addl $20, %esp 327 ; NORMAL-NEXT: pushl $12 328 ; NORMAL-NEXT: pushl $11 329 ; NORMAL-NEXT: pushl $10 330 ; NORMAL-NEXT: pushl $9 331 ; NORMAL-NEXT: calll _good 332 ; NORMAL-NEXT: addl $16, %esp 333 define void @test12b() optsize { 334 entry: 335 %s = alloca %struct.s, align 4 336 call void @good(i32 1, i32 2, i32 3, i32 4) 337 call void @struct(%struct.s* %s, i32 6, i32 7, i32 8) 338 call void @good(i32 9, i32 10, i32 11, i32 12) 339 ret void 340 } 341 342 ; Make sure the add does not prevent folding loads into pushes. 343 ; val1 and val2 will not be folded into pushes since they have 344 ; an additional use, but val3 should be. 345 ; NORMAL-LABEL: test13: 346 ; NORMAL: movl ([[P1:%e..]]), [[V1:%e..]] 347 ; NORMAL-NEXT: movl ([[P2:%e..]]), [[V2:%e..]] 348 ; NORMAL-NEXT: , [[ADD:%e..]] 349 ; NORMAL-NEXT: pushl [[ADD]] 350 ; NORMAL-NEXT: pushl ([[P3:%e..]]) 351 ; NORMAL-NEXT: pushl [[V2]] 352 ; NORMAL-NEXT: pushl [[V1]] 353 ; NORMAL-NEXT: calll _good 354 ; NORMAL: movl [[P3]], %eax 355 define i32* @test13(i32* inreg %ptr1, i32* inreg %ptr2, i32* inreg %ptr3) optsize { 356 entry: 357 %val1 = load i32, i32* %ptr1 358 %val2 = load i32, i32* %ptr2 359 %val3 = load i32, i32* %ptr3 360 %add = add i32 %val1, %val2 361 call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add) 362 ret i32* %ptr3 363 } 364 365 ; Make sure to fold adjacent stack adjustments. 366 ; LINUX-LABEL: pr27140: 367 ; LINUX: subl $12, %esp 368 ; LINUX: .cfi_def_cfa_offset 16 369 ; LINUX-NOT: sub 370 ; LINUX: pushl $4 371 ; LINUX: .cfi_adjust_cfa_offset 4 372 ; LINUX: pushl $3 373 ; LINUX: .cfi_adjust_cfa_offset 4 374 ; LINUX: pushl $2 375 ; LINUX: .cfi_adjust_cfa_offset 4 376 ; LINUX: pushl $1 377 ; LINUX: .cfi_adjust_cfa_offset 4 378 ; LINUX: calll good 379 ; LINUX: addl $28, %esp 380 ; LINUX: .cfi_adjust_cfa_offset -16 381 ; LINUX-NOT: add 382 ; LINUX: retl 383 define void @pr27140() optsize { 384 entry: 385 tail call void @good(i32 1, i32 2, i32 3, i32 4) 386 ret void 387 } 388 389 ; Check that a stack restore (leal -4(%ebp), %esp) doesn't get merged with a 390 ; stack adjustment (addl $12, %esp). Just because it's a lea doesn't mean it's 391 ; simply decreasing the stack pointer. 392 ; NORMAL-LABEL: test14: 393 ; NORMAL: calll _B_func 394 ; NORMAL: leal -4(%ebp), %esp 395 ; NORMAL-NOT: %esp 396 ; NORMAL: retl 397 %struct.A = type { i32, i32 } 398 %struct.B = type { i8 } 399 declare x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned, %struct.A* byval) 400 declare void @B_func(%struct.B* sret, %struct.B*, i32) 401 define void @test14(%struct.A* %a) { 402 entry: 403 %ref.tmp = alloca %struct.B, align 1 404 %agg.tmp = alloca i64, align 4 405 %tmpcast = bitcast i64* %agg.tmp to %struct.A* 406 %tmp = alloca %struct.B, align 1 407 %0 = bitcast %struct.A* %a to i64* 408 %1 = load i64, i64* %0, align 4 409 store i64 %1, i64* %agg.tmp, align 4 410 %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval %tmpcast) 411 %2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0 412 call void @B_func(%struct.B* sret %tmp, %struct.B* %ref.tmp, i32 1) 413 ret void 414 } 415