Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
      2 ; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
      3 ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
      4 ; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
      5 ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX
      6 
      7 %class.Class = type { i32 }
      8 %struct.s = type { i64 }
      9 
     10 declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
     11 declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
     12 declare x86_thiscallcc void @thiscall(%class.Class* %class, i32 %a, i32 %b, i32 %c, i32 %d)
     13 declare void @oneparam(i32 %a)
     14 declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
     15 declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d)
     16 declare void @inalloca(<{ %struct.s }>* inalloca)
     17 
     18 declare i8* @llvm.stacksave()
     19 declare void @llvm.stackrestore(i8*)
     20 
     21 ; We should get pushes for x86, even though there is a reserved call frame.
     22 ; Make sure we don't touch x86-64, and that turning it off works.
     23 ; NORMAL-LABEL: test1:
     24 ; NORMAL-NOT: subl {{.*}} %esp
     25 ; NORMAL: pushl   $4
     26 ; NORMAL-NEXT: pushl   $3
     27 ; NORMAL-NEXT: pushl   $2
     28 ; NORMAL-NEXT: pushl   $1
     29 ; NORMAL-NEXT: call
     30 ; NORMAL-NEXT: addl $16, %esp
     31 ; X64-LABEL: test1:
     32 ; X64: movl    $1, %ecx
     33 ; X64-NEXT: movl    $2, %edx
     34 ; X64-NEXT: movl    $3, %r8d
     35 ; X64-NEXT: movl    $4, %r9d
     36 ; X64-NEXT: callq   good
     37 ; NOPUSH-LABEL: test1:
     38 ; NOPUSH: subl    $16, %esp
     39 ; NOPUSH-NEXT: movl    $4, 12(%esp)
     40 ; NOPUSH-NEXT: movl    $3, 8(%esp)
     41 ; NOPUSH-NEXT: movl    $2, 4(%esp)
     42 ; NOPUSH-NEXT: movl    $1, (%esp)
     43 ; NOPUSH-NEXT: call
     44 ; NOPUSH-NEXT: addl $16, %esp
     45 define void @test1() {
     46 entry:
     47   call void @good(i32 1, i32 2, i32 3, i32 4)
     48   ret void
     49 }
     50 
     51 ; If we have a reserved frame, we should have pushes
     52 ; NORMAL-LABEL: test2:
     53 ; NORMAL-NOT: subl {{.*}} %esp
     54 ; NORMAL: pushl   $4
     55 ; NORMAL-NEXT: pushl   $3
     56 ; NORMAL-NEXT: pushl   $2
     57 ; NORMAL-NEXT: pushl   $1
     58 ; NORMAL-NEXT: call
     59 define void @test2(i32 %k) {
     60 entry:
     61   %a = alloca i32, i32 %k
     62   call void @good(i32 1, i32 2, i32 3, i32 4)
     63   ret void
     64 }
     65 
     66 ; Again, we expect a sequence of 4 immediate pushes
     67 ; Checks that we generate the right pushes for >8bit immediates
     68 ; NORMAL-LABEL: test2b:
     69 ; NORMAL-NOT: subl {{.*}} %esp
     70 ; NORMAL: pushl   $4096
     71 ; NORMAL-NEXT: pushl   $3072
     72 ; NORMAL-NEXT: pushl   $2048
     73 ; NORMAL-NEXT: pushl   $1024
     74 ; NORMAL-NEXT: call
     75 ; NORMAL-NEXT: addl $16, %esp
     76 define void @test2b() optsize {
     77 entry:
     78   call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
     79   ret void
     80 }
     81 
     82 ; The first push should push a register
     83 ; NORMAL-LABEL: test3:
     84 ; NORMAL-NOT: subl {{.*}} %esp
     85 ; NORMAL: pushl   $4
     86 ; NORMAL-NEXT: pushl   $3
     87 ; NORMAL-NEXT: pushl   $2
     88 ; NORMAL-NEXT: pushl   %e{{..}}
     89 ; NORMAL-NEXT: call
     90 ; NORMAL-NEXT: addl $16, %esp
     91 define void @test3(i32 %k) optsize {
     92 entry:
     93   %f = add i32 %k, 1
     94   call void @good(i32 %f, i32 2, i32 3, i32 4)
     95   ret void
     96 }
     97 
     98 ; We support weird calling conventions
     99 ; NORMAL-LABEL: test4:
    100 ; NORMAL: movl    $2, %eax
    101 ; NORMAL-NEXT: pushl   $4
    102 ; NORMAL-NEXT: pushl   $3
    103 ; NORMAL-NEXT: pushl   $1
    104 ; NORMAL-NEXT: call
    105 ; NORMAL-NEXT: addl $12, %esp
    106 define void @test4() optsize {
    107 entry:
    108   call void @inreg(i32 1, i32 2, i32 3, i32 4)
    109   ret void
    110 }
    111 
    112 ; NORMAL-LABEL: test4b:
    113 ; NORMAL: movl 4(%esp), %ecx
    114 ; NORMAL-NEXT: pushl   $4
    115 ; NORMAL-NEXT: pushl   $3
    116 ; NORMAL-NEXT: pushl   $2
    117 ; NORMAL-NEXT: pushl   $1
    118 ; NORMAL-NEXT: call
    119 ; NORMAL-NEXT: ret
    120 define void @test4b(%class.Class* %f) optsize {
    121 entry:
    122   call x86_thiscallcc void @thiscall(%class.Class* %f, i32 1, i32 2, i32 3, i32 4)
    123   ret void
    124 }
    125 
    126 ; When there is no reserved call frame, check that additional alignment
    127 ; is added when the pushes don't add up to the required alignment.
    128 ; ALIGNED-LABEL: test5:
    129 ; ALIGNED: subl    $16, %esp
    130 ; ALIGNED-NEXT: pushl   $4
    131 ; ALIGNED-NEXT: pushl   $3
    132 ; ALIGNED-NEXT: pushl   $2
    133 ; ALIGNED-NEXT: pushl   $1
    134 ; ALIGNED-NEXT: call
    135 define void @test5(i32 %k) {
    136 entry:
    137   %a = alloca i32, i32 %k
    138   call void @good(i32 1, i32 2, i32 3, i32 4)
    139   ret void
    140 }
    141 
    142 ; When the alignment adds up, do the transformation
    143 ; ALIGNED-LABEL: test5b:
    144 ; ALIGNED: pushl   $8
    145 ; ALIGNED-NEXT: pushl   $7
    146 ; ALIGNED-NEXT: pushl   $6
    147 ; ALIGNED-NEXT: pushl   $5
    148 ; ALIGNED-NEXT: pushl   $4
    149 ; ALIGNED-NEXT: pushl   $3
    150 ; ALIGNED-NEXT: pushl   $2
    151 ; ALIGNED-NEXT: pushl   $1
    152 ; ALIGNED-NEXT: call
    153 define void @test5b() optsize {
    154 entry:
    155   call void @eightparams(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8)
    156   ret void
    157 }
    158 
    159 ; When having to compensate for the alignment isn't worth it,
    160 ; don't use pushes.
    161 ; ALIGNED-LABEL: test5c:
    162 ; ALIGNED: movl $1, (%esp)
    163 ; ALIGNED-NEXT: call
    164 define void @test5c() optsize {
    165 entry:
    166   call void @oneparam(i32 1)
    167   ret void
    168 }
    169 
    170 ; Check that pushing the addresses of globals (Or generally, things that 
    171 ; aren't exactly immediates) isn't broken.
    172 ; Fixes PR21878.
    173 ; NORMAL-LABEL: test6:
    174 ; NORMAL: pushl    $_ext
    175 ; NORMAL-NEXT: call
    176 declare void @f(i8*)
    177 @ext = external constant i8
    178 
    179 define void @test6() {
    180   call void @f(i8* @ext)
    181   br label %bb
    182 bb:
    183   alloca i32
    184   ret void
    185 }
    186 
    187 ; Check that we fold simple cases into the push
    188 ; NORMAL-LABEL: test7:
    189 ; NORMAL-NOT: subl {{.*}} %esp
    190 ; NORMAL: movl 4(%esp), [[EAX:%e..]]
    191 ; NORMAL-NEXT: pushl   $4
    192 ; NORMAL-NEXT: pushl   ([[EAX]])
    193 ; NORMAL-NEXT: pushl   $2
    194 ; NORMAL-NEXT: pushl   $1
    195 ; NORMAL-NEXT: call
    196 ; NORMAL-NEXT: addl $16, %esp
    197 define void @test7(i32* %ptr) optsize {
    198 entry:
    199   %val = load i32, i32* %ptr
    200   call void @good(i32 1, i32 2, i32 %val, i32 4)
    201   ret void
    202 }
    203 
    204 ; Fold stack-relative loads into the push, with correct offset
    205 ; In particular, at the second push, %b was at 12(%esp) and
    206 ; %a wast at 8(%esp), but the second push bumped %esp, so %a
    207 ; is now it at 12(%esp)
    208 ; NORMAL-LABEL: test8:
    209 ; NORMAL: pushl   $4
    210 ; NORMAL-NEXT: pushl   12(%esp)
    211 ; NORMAL-NEXT: pushl   12(%esp)
    212 ; NORMAL-NEXT: pushl   $1
    213 ; NORMAL-NEXT: call
    214 ; NORMAL-NEXT: addl $16, %esp
    215 define void @test8(i32 %a, i32 %b) optsize {
    216 entry:
    217   call void @good(i32 1, i32 %a, i32 %b, i32 4)
    218   ret void
    219 }
    220 
    221 ; If one function is using push instructions, and the other isn't
    222 ; (because it has frame-index references), then we must resolve
    223 ; these references correctly.
    224 ; NORMAL-LABEL: test9:
    225 ; NORMAL-NOT: leal (%esp), 
    226 ; NORMAL: pushl $4
    227 ; NORMAL-NEXT: pushl $3
    228 ; NORMAL-NEXT: pushl $2
    229 ; NORMAL-NEXT: pushl $1
    230 ; NORMAL-NEXT: call
    231 ; NORMAL-NEXT: subl $4, %esp
    232 ; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
    233 ; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
    234 ; NORMAL-NEXT: movl    [[E2]], 4(%esp)
    235 ; NORMAL-NEXT: movl    [[E1]], (%esp)
    236 ; NORMAL-NEXT: leal 32(%esp), [[E3:%e..]]
    237 ; NORMAL-NEXT: movl    [[E3]], 16(%esp)
    238 ; NORMAL-NEXT: leal 28(%esp), [[E4:%e..]]
    239 ; NORMAL-NEXT: movl    [[E4]], 12(%esp)
    240 ; NORMAL-NEXT: movl    $6, 8(%esp)
    241 ; NORMAL-NEXT: call
    242 ; NORMAL-NEXT: addl $20, %esp
    243 define void @test9() optsize {
    244 entry:
    245   %p = alloca i32, align 4
    246   %q = alloca i32, align 4
    247   %s = alloca %struct.s, align 4  
    248   call void @good(i32 1, i32 2, i32 3, i32 4)
    249   %pv = ptrtoint i32* %p to i32
    250   %qv = ptrtoint i32* %q to i32
    251   call void @struct(%struct.s* byval %s, i32 6, i32 %qv, i32 %pv)
    252   ret void
    253 }
    254 
    255 ; We can end up with an indirect call which gets reloaded on the spot.
    256 ; Make sure we reference the correct stack slot - we spill into (%esp)
    257 ; and reload from 16(%esp) due to the pushes.
    258 ; NORMAL-LABEL: test10:
    259 ; NORMAL: movl $_good, [[ALLOC:.*]]
    260 ; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]]
    261 ; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill
    262 ; NORMAL: nop
    263 ; NORMAL: pushl $4
    264 ; NORMAL-NEXT: pushl $3
    265 ; NORMAL-NEXT: pushl $2
    266 ; NORMAL-NEXT: pushl $1
    267 ; NORMAL-NEXT: calll *16(%esp)
    268 ; NORMAL-NEXT: addl $24, %esp
    269 define void @test10() optsize {
    270   %stack_fptr = alloca void (i32, i32, i32, i32)*
    271   store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
    272   %good_ptr = load volatile void (i32, i32, i32, i32)*, void (i32, i32, i32, i32)** %stack_fptr
    273   call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
    274   call void (i32, i32, i32, i32) %good_ptr(i32 1, i32 2, i32 3, i32 4)
    275   ret void
    276 }
    277 
    278 ; We can't fold the load from the global into the push because of 
    279 ; interference from the store
    280 ; NORMAL-LABEL: test11:
    281 ; NORMAL: movl    _the_global, [[EAX:%e..]]
    282 ; NORMAL-NEXT: movl    $42, _the_global
    283 ; NORMAL-NEXT: pushl $4
    284 ; NORMAL-NEXT: pushl $3
    285 ; NORMAL-NEXT: pushl $2
    286 ; NORMAL-NEXT: pushl [[EAX]]
    287 ; NORMAL-NEXT: call
    288 ; NORMAL-NEXT: addl $16, %esp
    289 @the_global = external global i32
    290 define void @test11() optsize {
    291   %myload = load i32, i32* @the_global
    292   store i32 42, i32* @the_global
    293   call void @good(i32 %myload, i32 2, i32 3, i32 4)
    294   ret void
    295 }
    296 
    297 ; Converting one mov into a push isn't worth it when 
    298 ; doing so forces too much overhead for other calls.
    299 ; NORMAL-LABEL: test12:
    300 ; NORMAL: movl    $8, 12(%esp)
    301 ; NORMAL-NEXT: movl    $7, 8(%esp)
    302 ; NORMAL-NEXT: movl    $6, 4(%esp)
    303 ; NORMAL-NEXT: movl    $5, (%esp)
    304 ; NORMAL-NEXT: calll _good
    305 define void @test12() optsize {
    306 entry:
    307   %s = alloca %struct.s, align 4  
    308   call void @struct(%struct.s* %s, i32 2, i32 3, i32 4)
    309   call void @good(i32 5, i32 6, i32 7, i32 8)
    310   call void @struct(%struct.s* %s, i32 10, i32 11, i32 12)
    311   ret void
    312 }
    313 
    314 ; But if the gains outweigh the overhead, we should do it
    315 ; NORMAL-LABEL: test12b:
    316 ; NORMAL: pushl    $4
    317 ; NORMAL-NEXT: pushl    $3
    318 ; NORMAL-NEXT: pushl    $2
    319 ; NORMAL-NEXT: pushl    $1
    320 ; NORMAL-NEXT: calll _good
    321 ; NORMAL-NEXT: subl    $4, %esp
    322 ; NORMAL: movl    $8, 16(%esp)
    323 ; NORMAL-NEXT: movl    $7, 12(%esp)
    324 ; NORMAL-NEXT: movl    $6, 8(%esp)
    325 ; NORMAL-NEXT: calll _struct
    326 ; NORMAL-NEXT: addl    $20, %esp
    327 ; NORMAL-NEXT: pushl    $12
    328 ; NORMAL-NEXT: pushl    $11
    329 ; NORMAL-NEXT: pushl    $10
    330 ; NORMAL-NEXT: pushl    $9
    331 ; NORMAL-NEXT: calll _good
    332 ; NORMAL-NEXT: addl $16, %esp
    333 define void @test12b() optsize {
    334 entry:
    335   %s = alloca %struct.s, align 4  
    336   call void @good(i32 1, i32 2, i32 3, i32 4)  
    337   call void @struct(%struct.s* %s, i32 6, i32 7, i32 8)
    338   call void @good(i32 9, i32 10, i32 11, i32 12)
    339   ret void
    340 }
    341 
    342 ; Make sure the add does not prevent folding loads into pushes.
    343 ; val1 and val2 will not be folded into pushes since they have
    344 ; an additional use, but val3 should be.
    345 ; NORMAL-LABEL: test13:
    346 ; NORMAL: movl ([[P1:%e..]]), [[V1:%e..]]
    347 ; NORMAL-NEXT: movl ([[P2:%e..]]), [[V2:%e..]]
    348 ; NORMAL-NEXT: , [[ADD:%e..]]
    349 ; NORMAL-NEXT: pushl [[ADD]]
    350 ; NORMAL-NEXT: pushl ([[P3:%e..]])
    351 ; NORMAL-NEXT: pushl [[V2]]
    352 ; NORMAL-NEXT: pushl [[V1]]
    353 ; NORMAL-NEXT: calll _good
    354 ; NORMAL: movl [[P3]], %eax
    355 define i32* @test13(i32* inreg %ptr1, i32* inreg %ptr2, i32* inreg %ptr3) optsize {
    356 entry:
    357   %val1 = load i32, i32* %ptr1
    358   %val2 = load i32, i32* %ptr2
    359   %val3 = load i32, i32* %ptr3
    360   %add = add i32 %val1, %val2
    361   call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
    362   ret i32* %ptr3
    363 }
    364 
    365 ; Make sure to fold adjacent stack adjustments.
    366 ; LINUX-LABEL: pr27140:
    367 ; LINUX: subl    $12, %esp
    368 ; LINUX: .cfi_def_cfa_offset 16
    369 ; LINUX-NOT: sub
    370 ; LINUX: pushl   $4
    371 ; LINUX: .cfi_adjust_cfa_offset 4
    372 ; LINUX: pushl   $3
    373 ; LINUX: .cfi_adjust_cfa_offset 4
    374 ; LINUX: pushl   $2
    375 ; LINUX: .cfi_adjust_cfa_offset 4
    376 ; LINUX: pushl   $1
    377 ; LINUX: .cfi_adjust_cfa_offset 4
    378 ; LINUX: calll   good
    379 ; LINUX: addl    $28, %esp
    380 ; LINUX: .cfi_adjust_cfa_offset -16
    381 ; LINUX-NOT: add
    382 ; LINUX: retl
    383 define void @pr27140() optsize {
    384 entry:
    385   tail call void @good(i32 1, i32 2, i32 3, i32 4)
    386   ret void
    387 }
    388 
    389 ; Check that a stack restore (leal -4(%ebp), %esp) doesn't get merged with a
    390 ; stack adjustment (addl $12, %esp). Just because it's a lea doesn't mean it's
    391 ; simply decreasing the stack pointer.
    392 ; NORMAL-LABEL: test14:
    393 ; NORMAL: calll _B_func
    394 ; NORMAL: leal -4(%ebp), %esp
    395 ; NORMAL-NOT: %esp
    396 ; NORMAL: retl
    397 %struct.A = type { i32, i32 }
    398 %struct.B = type { i8 }
    399 declare x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned, %struct.A* byval)
    400 declare void @B_func(%struct.B* sret, %struct.B*, i32)
    401 define void @test14(%struct.A* %a) {
    402 entry:
    403   %ref.tmp = alloca %struct.B, align 1
    404   %agg.tmp = alloca i64, align 4
    405   %tmpcast = bitcast i64* %agg.tmp to %struct.A*
    406   %tmp = alloca %struct.B, align 1
    407   %0 = bitcast %struct.A* %a to i64*
    408   %1 = load i64, i64* %0, align 4
    409   store i64 %1, i64* %agg.tmp, align 4
    410   %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval %tmpcast)
    411   %2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0
    412   call void @B_func(%struct.B* sret %tmp, %struct.B* %ref.tmp, i32 1)
    413   ret void
    414 }
    415