1 ; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX 2 ; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=LINUX-X32 3 ; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS 4 ; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86 5 ; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=i686-windows -mattr=+sse2 | FileCheck %s --check-prefix=X86 6 7 ; Test that we actually spill and reload all arguments in the variadic argument 8 ; pack. Doing a normal call will clobber all argument registers, and we will 9 ; spill around it. A simple adjustment should not require any XMM spills. 10 11 declare void @llvm.va_start(i8*) nounwind 12 13 declare void(i8*, ...)* @get_f(i8* %this) 14 15 define void @f_thunk(i8* %this, ...) { 16 ; Use va_start so that we exercise the combination. 17 %ap = alloca [4 x i8*], align 16 18 %ap_i8 = bitcast [4 x i8*]* %ap to i8* 19 call void @llvm.va_start(i8* %ap_i8) 20 21 %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this) 22 musttail call void (i8*, ...) %fptr(i8* %this, ...) 23 ret void 24 } 25 26 ; Save and restore 6 GPRs, 8 XMMs, and AL around the call. 27 28 ; LINUX-LABEL: f_thunk: 29 ; LINUX-DAG: movq %rdi, {{.*}} 30 ; LINUX-DAG: movq %rsi, {{.*}} 31 ; LINUX-DAG: movq %rdx, {{.*}} 32 ; LINUX-DAG: movq %rcx, {{.*}} 33 ; LINUX-DAG: movq %r8, {{.*}} 34 ; LINUX-DAG: movq %r9, {{.*}} 35 ; LINUX-DAG: movb %al, {{.*}} 36 ; LINUX-DAG: movaps %xmm0, {{[0-9]*}}(%rsp) 37 ; LINUX-DAG: movaps %xmm1, {{[0-9]*}}(%rsp) 38 ; LINUX-DAG: movaps %xmm2, {{[0-9]*}}(%rsp) 39 ; LINUX-DAG: movaps %xmm3, {{[0-9]*}}(%rsp) 40 ; LINUX-DAG: movaps %xmm4, {{[0-9]*}}(%rsp) 41 ; LINUX-DAG: movaps %xmm5, {{[0-9]*}}(%rsp) 42 ; LINUX-DAG: movaps %xmm6, {{[0-9]*}}(%rsp) 43 ; LINUX-DAG: movaps %xmm7, {{[0-9]*}}(%rsp) 44 ; LINUX: callq get_f 45 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm0 46 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm1 47 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm2 48 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm3 49 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm4 50 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm5 51 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm6 52 ; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm7 53 ; LINUX-DAG: movq {{.*}}, %rdi 54 ; LINUX-DAG: movq {{.*}}, %rsi 55 ; LINUX-DAG: movq {{.*}}, %rdx 56 ; LINUX-DAG: movq {{.*}}, %rcx 57 ; LINUX-DAG: movq {{.*}}, %r8 58 ; LINUX-DAG: movq {{.*}}, %r9 59 ; LINUX-DAG: movb {{.*}}, %al 60 ; LINUX: jmpq *{{.*}} # TAILCALL 61 62 ; LINUX-X32-LABEL: f_thunk: 63 ; LINUX-X32-DAG: movl %edi, {{.*}} 64 ; LINUX-X32-DAG: movq %rsi, {{.*}} 65 ; LINUX-X32-DAG: movq %rdx, {{.*}} 66 ; LINUX-X32-DAG: movq %rcx, {{.*}} 67 ; LINUX-X32-DAG: movq %r8, {{.*}} 68 ; LINUX-X32-DAG: movq %r9, {{.*}} 69 ; LINUX-X32-DAG: movb %al, {{.*}} 70 ; LINUX-X32-DAG: movaps %xmm0, {{[0-9]*}}(%esp) 71 ; LINUX-X32-DAG: movaps %xmm1, {{[0-9]*}}(%esp) 72 ; LINUX-X32-DAG: movaps %xmm2, {{[0-9]*}}(%esp) 73 ; LINUX-X32-DAG: movaps %xmm3, {{[0-9]*}}(%esp) 74 ; LINUX-X32-DAG: movaps %xmm4, {{[0-9]*}}(%esp) 75 ; LINUX-X32-DAG: movaps %xmm5, {{[0-9]*}}(%esp) 76 ; LINUX-X32-DAG: movaps %xmm6, {{[0-9]*}}(%esp) 77 ; LINUX-X32-DAG: movaps %xmm7, {{[0-9]*}}(%esp) 78 ; LINUX-X32: callq get_f 79 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm0 80 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm1 81 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm2 82 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm3 83 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm4 84 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm5 85 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm6 86 ; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm7 87 ; LINUX-X32-DAG: movl {{.*}}, %edi 88 ; LINUX-X32-DAG: movq {{.*}}, %rsi 89 ; LINUX-X32-DAG: movq {{.*}}, %rdx 90 ; LINUX-X32-DAG: movq {{.*}}, %rcx 91 ; LINUX-X32-DAG: movq {{.*}}, %r8 92 ; LINUX-X32-DAG: movq {{.*}}, %r9 93 ; LINUX-X32-DAG: movb {{.*}}, %al 94 ; LINUX-X32: jmpq *{{.*}} # TAILCALL 95 96 ; WINDOWS-LABEL: f_thunk: 97 ; WINDOWS-NOT: mov{{.}}ps 98 ; WINDOWS-DAG: movq %rdx, {{.*}} 99 ; WINDOWS-DAG: movq %rcx, {{.*}} 100 ; WINDOWS-DAG: movq %r8, {{.*}} 101 ; WINDOWS-DAG: movq %r9, {{.*}} 102 ; WINDOWS-NOT: mov{{.}}ps 103 ; WINDOWS: callq get_f 104 ; WINDOWS-NOT: mov{{.}}ps 105 ; WINDOWS-DAG: movq {{.*}}, %rdx 106 ; WINDOWS-DAG: movq {{.*}}, %rcx 107 ; WINDOWS-DAG: movq {{.*}}, %r8 108 ; WINDOWS-DAG: movq {{.*}}, %r9 109 ; WINDOWS-NOT: mov{{.}}ps 110 ; WINDOWS: jmpq *{{.*}} # TAILCALL 111 112 ; No regparms on normal x86 conventions. 113 114 ; X86-LABEL: _f_thunk: 115 ; X86: calll _get_f 116 ; X86: jmpl *{{.*}} # TAILCALL 117 118 ; This thunk shouldn't require any spills and reloads, assuming the register 119 ; allocator knows what it's doing. 120 121 define void @g_thunk(i8* %fptr_i8, ...) { 122 %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)* 123 musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...) 124 ret void 125 } 126 127 ; LINUX-LABEL: g_thunk: 128 ; LINUX-NOT: movq 129 ; LINUX: jmpq *%rdi # TAILCALL 130 131 ; LINUX-X32-LABEL: g_thunk: 132 ; LINUX-X32-DAG: movl %edi, %[[REG:e[abcd]x|ebp|esi|edi|r8|r9|r1[0-5]]] 133 ; LINUX-X32-DAG: jmpq *%[[REG]] # TAILCALL 134 135 ; WINDOWS-LABEL: g_thunk: 136 ; WINDOWS-NOT: movq 137 ; WINDOWS: jmpq *%rcx # TAILCALL 138 139 ; X86-LABEL: _g_thunk: 140 ; X86-NOT: push %ebp 141 ; X86-NOT: andl {{.*}}, %esp 142 ; X86: jmpl *%eax # TAILCALL 143 144 ; Do a simple multi-exit multi-bb test. 145 146 %struct.Foo = type { i1, i8*, i8* } 147 148 @g = external global i32 149 150 define void @h_thunk(%struct.Foo* %this, ...) { 151 %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 152 %cond = load i1, i1* %cond_p 153 br i1 %cond, label %then, label %else 154 155 then: 156 %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1 157 %a_i8 = load i8*, i8** %a_p 158 %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)* 159 musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...) 160 ret void 161 162 else: 163 %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2 164 %b_i8 = load i8*, i8** %b_p 165 %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)* 166 store i32 42, i32* @g 167 musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...) 168 ret void 169 } 170 171 ; LINUX-LABEL: h_thunk: 172 ; LINUX: jne 173 ; LINUX: jmpq *{{.*}} # TAILCALL 174 ; LINUX: jmpq *{{.*}} # TAILCALL 175 ; LINUX-X32-LABEL: h_thunk: 176 ; LINUX-X32: jne 177 ; LINUX-X32: jmpq *{{.*}} # TAILCALL 178 ; LINUX-X32: jmpq *{{.*}} # TAILCALL 179 ; WINDOWS-LABEL: h_thunk: 180 ; WINDOWS: jne 181 ; WINDOWS: jmpq *{{.*}} # TAILCALL 182 ; WINDOWS: jmpq *{{.*}} # TAILCALL 183 ; X86-LABEL: _h_thunk: 184 ; X86: jne 185 ; X86: jmpl *{{.*}} # TAILCALL 186 ; X86: jmpl *{{.*}} # TAILCALL 187