1 ; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64 2 ; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX 3 4 ; By default, windows CoreCLR requires an inline prologue stack expansion check 5 ; if more than 4096 bytes are allocated on the stack. 6 7 ; Prolog stack allocation >= 4096 bytes will require the probe sequence 8 define i32 @main4k() nounwind { 9 entry: 10 ; WIN_X64-LABEL:main4k: 11 ; WIN_X64: # BB#0: 12 ; WIN_X64: movl $4096, %eax 13 ; WIN_X64: movq %rcx, 8(%rsp) 14 ; WIN_X64: movq %rdx, 16(%rsp) 15 ; WIN_X64: xorq %rcx, %rcx 16 ; WIN_X64: movq %rsp, %rdx 17 ; WIN_X64: subq %rax, %rdx 18 ; WIN_X64: cmovbq %rcx, %rdx 19 ; WIN_X64: movq %gs:16, %rcx 20 ; WIN_X64: cmpq %rcx, %rdx 21 ; WIN_X64: jae .LBB0_3 22 ; WIN_X64:# BB#1: 23 ; WIN_X64: andq $-4096, %rdx 24 ; WIN_X64:.LBB0_2: 25 ; WIN_X64: leaq -4096(%rcx), %rcx 26 ; WIN_X64: movb $0, (%rcx) 27 ; WIN_X64: cmpq %rcx, %rdx 28 ; WIN_X64: jne .LBB0_2 29 ; WIN_X64:.LBB0_3: 30 ; WIN_X64: movq 8(%rsp), %rcx 31 ; WIN_X64: movq 16(%rsp), %rdx 32 ; WIN_X64: subq %rax, %rsp 33 ; WIN_X64: xorl %eax, %eax 34 ; WIN_X64: addq $4096, %rsp 35 ; WIN_X64: retq 36 ; LINUX-LABEL:main4k: 37 ; LINUX-NOT: movq %gs:16, %rcx 38 ; LINUX: retq 39 %a = alloca [4096 x i8] 40 ret i32 0 41 } 42 43 ; Prolog stack allocation >= 4096 bytes will require the probe sequence 44 ; Case with frame pointer 45 define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" { 46 entry: 47 ; WIN_X64-LABEL:main4k_frame: 48 ; WIN_X64: movq %rcx, 16(%rsp) 49 ; WIN_X64: movq %gs:16, %rcx 50 ; LINUX-LABEL:main4k_frame: 51 ; LINUX-NOT: movq %gs:16, %rcx 52 ; LINUX: retq 53 %a = alloca [4096 x i8] 54 ret i32 0 55 } 56 57 ; Prolog stack allocation >= 4096 bytes will require the probe sequence 58 ; Case with INT args 59 define i32 @main4k_intargs(i32 %x, i32 %y) nounwind { 60 entry: 61 ; WIN_X64: movq %rcx, 8(%rsp) 62 ; WIN_X64: movq %gs:16, %rcx 63 ; LINUX-NOT: movq %gs:16, %rcx 64 ; LINUX: retq 65 %a = alloca [4096 x i8] 66 %t = add i32 %x, %y 67 ret i32 %t 68 } 69 70 ; Prolog stack allocation >= 4096 bytes will require the probe sequence 71 ; Case with FP regs 72 define i32 @main4k_fpargs(double %x, double %y) nounwind { 73 entry: 74 ; WIN_X64: movq %rcx, 8(%rsp) 75 ; WIN_X64: movq %gs:16, %rcx 76 ; LINUX-NOT: movq %gs:16, %rcx 77 ; LINUX: retq 78 %a = alloca [4096 x i8] 79 ret i32 0 80 } 81 82 ; Prolog stack allocation >= 4096 bytes will require the probe sequence 83 ; Case with mixed regs 84 define i32 @main4k_mixargs(double %x, i32 %y) nounwind { 85 entry: 86 ; WIN_X64: movq %gs:16, %rcx 87 ; LINUX-NOT: movq %gs:16, %rcx 88 ; LINUX: retq 89 %a = alloca [4096 x i8] 90 ret i32 %y 91 } 92 93 ; Make sure we don't emit the probe for a smaller prolog stack allocation. 94 define i32 @main128() nounwind { 95 entry: 96 ; WIN_X64-NOT: movq %gs:16, %rcx 97 ; WIN_X64: retq 98 ; LINUX-NOT: movq %gs:16, %rcx 99 ; LINUX: retq 100 %a = alloca [128 x i8] 101 ret i32 0 102 } 103 104 ; Make sure we don't emit the probe sequence if not on windows even if the 105 ; caller has the Win64 calling convention. 106 define x86_64_win64cc i32 @main4k_win64() nounwind { 107 entry: 108 ; WIN_X64: movq %gs:16, %rcx 109 ; LINUX-NOT: movq %gs:16, %rcx 110 ; LINUX: retq 111 %a = alloca [4096 x i8] 112 ret i32 0 113 } 114 115 declare i32 @bar(i8*) nounwind 116 117 ; Within-body inline probe expansion 118 define x86_64_win64cc i32 @main4k_alloca(i64 %n) nounwind { 119 entry: 120 ; WIN_X64: callq bar 121 ; WIN_X64: movq %gs:16, [[R:%r.*]] 122 ; WIN_X64: callq bar 123 ; LINUX: callq bar 124 ; LINUX-NOT: movq %gs:16, [[R:%r.*]] 125 ; LINUX: callq bar 126 %a = alloca i8, i64 1024 127 %ra = call i32 @bar(i8* %a) nounwind 128 %b = alloca i8, i64 %n 129 %rb = call i32 @bar(i8* %b) nounwind 130 %r = add i32 %ra, %rb 131 ret i32 %r 132 } 133 134 ; Influence of stack-probe-size attribute 135 ; Note this is not exposed in coreclr 136 define i32 @test_probe_size() "stack-probe-size"="8192" nounwind { 137 ; WIN_X64-NOT: movq %gs:16, %rcx 138 ; WIN_X64: retq 139 ; LINUX-NOT: movq %gs:16, %rcx 140 ; LINUX: retq 141 %a = alloca [4096 x i8] 142 ret i32 0 143 } 144