Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64
      2 ; RUN: llc < %s -mtriple=x86_64-pc-linux         | FileCheck %s -check-prefix=LINUX
      3 
      4 ; By default, windows CoreCLR requires an inline prologue stack expansion check
      5 ; if more than 4096 bytes are allocated on the stack.
      6 
      7 ; Prolog stack allocation >= 4096 bytes will require the probe sequence
      8 define i32 @main4k() nounwind {
      9 entry:
     10 ; WIN_X64-LABEL:main4k:
     11 ; WIN_X64: # BB#0:
     12 ; WIN_X64:      movl    $4096, %eax
     13 ; WIN_X64:      movq    %rcx, 8(%rsp)
     14 ; WIN_X64:	movq	%rdx, 16(%rsp)
     15 ; WIN_X64:	xorq	%rcx, %rcx
     16 ; WIN_X64:	movq	%rsp, %rdx
     17 ; WIN_X64:	subq	%rax, %rdx
     18 ; WIN_X64:	cmovbq	%rcx, %rdx
     19 ; WIN_X64:	movq	%gs:16, %rcx
     20 ; WIN_X64:	cmpq	%rcx, %rdx
     21 ; WIN_X64:	jae	.LBB0_3
     22 ; WIN_X64:# BB#1:
     23 ; WIN_X64:	andq	$-4096, %rdx
     24 ; WIN_X64:.LBB0_2:
     25 ; WIN_X64:	leaq	-4096(%rcx), %rcx
     26 ; WIN_X64:	movb	$0, (%rcx)
     27 ; WIN_X64:	cmpq	%rcx, %rdx
     28 ; WIN_X64:	jne	.LBB0_2
     29 ; WIN_X64:.LBB0_3:
     30 ; WIN_X64:	movq	8(%rsp), %rcx
     31 ; WIN_X64:	movq	16(%rsp), %rdx
     32 ; WIN_X64:	subq	%rax, %rsp
     33 ; WIN_X64:	xorl	%eax, %eax
     34 ; WIN_X64:	addq	$4096, %rsp
     35 ; WIN_X64:	retq
     36 ; LINUX-LABEL:main4k:
     37 ; LINUX-NOT:    movq    %gs:16, %rcx
     38 ; LINUX: 	retq
     39   %a = alloca [4096 x i8]
     40   ret i32 0
     41 }
     42 
     43 ; Prolog stack allocation >= 4096 bytes will require the probe sequence
     44 ; Case with frame pointer
     45 define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" {
     46 entry:
     47 ; WIN_X64-LABEL:main4k_frame:
     48 ; WIN_X64:      movq    %rcx,   16(%rsp)
     49 ; WIN_X64:      movq    %gs:16, %rcx
     50 ; LINUX-LABEL:main4k_frame:
     51 ; LINUX-NOT:    movq    %gs:16, %rcx
     52 ; LINUX: 	retq
     53   %a = alloca [4096 x i8]
     54   ret i32 0
     55 }
     56 
     57 ; Prolog stack allocation >= 4096 bytes will require the probe sequence
     58 ; Case with INT args
     59 define i32 @main4k_intargs(i32 %x, i32 %y) nounwind {
     60 entry:
     61 ; WIN_X64:      movq    %rcx,   8(%rsp)
     62 ; WIN_X64:      movq    %gs:16, %rcx
     63 ; LINUX-NOT:    movq    %gs:16, %rcx
     64 ; LINUX: 	retq
     65   %a = alloca [4096 x i8]
     66   %t = add i32 %x, %y
     67   ret i32 %t
     68 }
     69 
     70 ; Prolog stack allocation >= 4096 bytes will require the probe sequence
     71 ; Case with FP regs
     72 define i32 @main4k_fpargs(double %x, double %y) nounwind {
     73 entry:
     74 ; WIN_X64:      movq    %rcx,   8(%rsp)
     75 ; WIN_X64:      movq    %gs:16, %rcx
     76 ; LINUX-NOT:    movq    %gs:16, %rcx
     77 ; LINUX: 	retq
     78   %a = alloca [4096 x i8]
     79   ret i32 0
     80 }
     81 
     82 ; Prolog stack allocation >= 4096 bytes will require the probe sequence
     83 ; Case with mixed regs
     84 define i32 @main4k_mixargs(double %x, i32 %y) nounwind {
     85 entry:
     86 ; WIN_X64:      movq    %gs:16, %rcx
     87 ; LINUX-NOT:    movq    %gs:16, %rcx
     88 ; LINUX: 	retq
     89   %a = alloca [4096 x i8]
     90   ret i32 %y
     91 }
     92 
     93 ; Make sure we don't emit the probe for a smaller prolog stack allocation.
     94 define i32 @main128() nounwind {
     95 entry:
     96 ; WIN_X64-NOT:  movq    %gs:16, %rcx
     97 ; WIN_X64:      retq
     98 ; LINUX-NOT:    movq    %gs:16, %rcx
     99 ; LINUX: 	retq
    100   %a = alloca [128 x i8]
    101   ret i32 0
    102 }
    103 
    104 ; Make sure we don't emit the probe sequence if not on windows even if the
    105 ; caller has the Win64 calling convention.
    106 define x86_64_win64cc i32 @main4k_win64() nounwind {
    107 entry:
    108 ; WIN_X64:      movq    %gs:16, %rcx
    109 ; LINUX-NOT:    movq    %gs:16, %rcx
    110 ; LINUX: 	retq
    111   %a = alloca [4096 x i8]
    112   ret i32 0
    113 }
    114 
    115 declare i32 @bar(i8*) nounwind
    116 
    117 ; Within-body inline probe expansion
    118 define x86_64_win64cc i32 @main4k_alloca(i64 %n) nounwind {
    119 entry:
    120 ; WIN_X64: 	callq	bar
    121 ; WIN_X64:  	movq	%gs:16, [[R:%r.*]]
    122 ; WIN_X64: 	callq	bar
    123 ; LINUX: 	callq	bar
    124 ; LINUX-NOT:  	movq	%gs:16, [[R:%r.*]]
    125 ; LINUX: 	callq	bar
    126   %a = alloca i8, i64 1024
    127   %ra = call i32 @bar(i8* %a) nounwind
    128   %b = alloca i8, i64 %n
    129   %rb = call i32 @bar(i8* %b) nounwind
    130   %r = add i32 %ra, %rb
    131   ret i32 %r
    132 }
    133 
    134 ; Influence of stack-probe-size attribute
    135 ; Note this is not exposed in coreclr
    136 define i32 @test_probe_size() "stack-probe-size"="8192" nounwind {
    137 ; WIN_X64-NOT:  movq    %gs:16, %rcx
    138 ; WIN_X64: 	retq
    139 ; LINUX-NOT:    movq    %gs:16, %rcx
    140 ; LINUX: 	retq
    141   %a = alloca [4096 x i8]
    142   ret i32 0
    143 }
    144