1 ; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck %s 2 ; TLS function were wrongly model and after fixing that, shrink-wrapping 3 ; cannot help here. To achieve the expected lowering, we need to playing 4 ; tricks similar to AArch64 fast TLS calling convention (r255821). 5 ; Applying tricks on x86-64 similar to r255821. 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0 8 %struct.S = type { i8 } 9 10 @sg = internal thread_local global %struct.S zeroinitializer, align 1 11 @__dso_handle = external global i8 12 @__tls_guard = internal thread_local unnamed_addr global i1 false 13 @sum1 = internal thread_local global i32 0, align 4 14 15 declare void @_ZN1SC1Ev(%struct.S*) 16 declare void @_ZN1SD1Ev(%struct.S*) 17 declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) 18 19 ; Every GPR should be saved - except rdi, rax, and rsp 20 ; CHECK-LABEL: _ZTW2sg 21 ; CHECK-NOT: pushq %r11 22 ; CHECK-NOT: pushq %r10 23 ; CHECK-NOT: pushq %r9 24 ; CHECK-NOT: pushq %r8 25 ; CHECK-NOT: pushq %rsi 26 ; CHECK-NOT: pushq %rdx 27 ; CHECK-NOT: pushq %rcx 28 ; CHECK-NOT: pushq %rbx 29 ; CHECK: callq 30 ; CHECK: jne 31 ; CHECK: callq 32 ; CHECK: tlv_atexit 33 ; CHECK: callq 34 ; CHECK-NOT: popq %rbx 35 ; CHECK-NOT: popq %rcx 36 ; CHECK-NOT: popq %rdx 37 ; CHECK-NOT: popq %rsi 38 ; CHECK-NOT: popq %r8 39 ; CHECK-NOT: popq %r9 40 ; CHECK-NOT: popq %r10 41 ; CHECK-NOT: popq %r11 42 43 ; CHECK-O0-LABEL: _ZTW2sg 44 ; CHECK-O0: pushq %r11 45 ; CHECK-O0: pushq %r10 46 ; CHECK-O0: pushq %r9 47 ; CHECK-O0: pushq %r8 48 ; CHECK-O0: pushq %rsi 49 ; CHECK-O0: pushq %rdx 50 ; CHECK-O0: pushq %rcx 51 ; CHECK-O0: callq 52 ; CHECK-O0: jne 53 ; CHECK-O0: callq 54 ; CHECK-O0: tlv_atexit 55 ; CHECK-O0: callq 56 ; CHECK-O0: popq %rcx 57 ; CHECK-O0: popq %rdx 58 ; CHECK-O0: popq %rsi 59 ; CHECK-O0: popq %r8 60 ; CHECK-O0: popq %r9 61 ; CHECK-O0: popq %r10 62 ; CHECK-O0: popq %r11 63 define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind { 64 %.b.i = load i1, i1* @__tls_guard, align 1 65 br i1 %.b.i, label %__tls_init.exit, label %init.i 66 67 init.i: 68 store i1 true, i1* @__tls_guard, align 1 69 tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2 70 %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2 71 br label %__tls_init.exit 72 73 __tls_init.exit: 74 ret %struct.S* @sg 75 } 76 77 ; CHECK-LABEL: _ZTW4sum1 78 ; CHECK-NOT: pushq %r11 79 ; CHECK-NOT: pushq %r10 80 ; CHECK-NOT: pushq %r9 81 ; CHECK-NOT: pushq %r8 82 ; CHECK-NOT: pushq %rsi 83 ; CHECK-NOT: pushq %rdx 84 ; CHECK-NOT: pushq %rcx 85 ; CHECK-NOT: pushq %rbx 86 ; CHECK: callq 87 ; CHECK-O0-LABEL: _ZTW4sum1 88 ; CHECK-O0-NOT: pushq %r11 89 ; CHECK-O0-NOT: pushq %r10 90 ; CHECK-O0-NOT: pushq %r9 91 ; CHECK-O0-NOT: pushq %r8 92 ; CHECK-O0-NOT: pushq %rsi 93 ; CHECK-O0-NOT: pushq %rdx 94 ; CHECK-O0-NOT: pushq %rcx 95 ; CHECK-O0-NOT: pushq %rbx 96 ; CHECK-O0-NOT: movq %r11 97 ; CHECK-O0-NOT: movq %r10 98 ; CHECK-O0-NOT: movq %r9 99 ; CHECK-O0-NOT: movq %r8 100 ; CHECK-O0-NOT: movq %rsi 101 ; CHECK-O0-NOT: movq %rdx 102 ; CHECK-O0-NOT: movq %rcx 103 ; CHECK-O0-NOT: movq %rbx 104 ; CHECK-O0: callq 105 define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { 106 ret i32* @sum1 107 } 108 109 ; Make sure at O0 we don't overwrite RBP. 110 ; CHECK-O0-LABEL: _ZTW4sum2 111 ; CHECK-O0: pushq %rbp 112 ; CHECK-O0: movq %rsp, %rbp 113 ; CHECK-O0-NOT: movq %r{{.*}}, (%rbp) 114 define cxx_fast_tlscc i32* @_ZTW4sum2() #0 { 115 ret i32* @sum1 116 } 117 118 ; Make sure at O0, we don't generate spilling/reloading of the CSRs. 119 ; CHECK-O0-LABEL: tls_test2 120 ; CHECK-O0-NOT: pushq %r11 121 ; CHECK-O0-NOT: pushq %r10 122 ; CHECK-O0-NOT: pushq %r9 123 ; CHECK-O0-NOT: pushq %r8 124 ; CHECK-O0-NOT: pushq %rsi 125 ; CHECK-O0-NOT: pushq %rdx 126 ; CHECK-O0: callq {{.*}}tls_helper 127 ; CHECK-O0-NOT: popq %rdx 128 ; CHECK-O0-NOT: popq %rsi 129 ; CHECK-O0-NOT: popq %r8 130 ; CHECK-O0-NOT: popq %r9 131 ; CHECK-O0-NOT: popq %r10 132 ; CHECK-O0-NOT: popq %r11 133 ; CHECK-O0: ret 134 %class.C = type { i32 } 135 @tC = internal thread_local global %class.C zeroinitializer, align 4 136 declare cxx_fast_tlscc void @tls_helper() 137 define cxx_fast_tlscc %class.C* @tls_test2() #1 { 138 call cxx_fast_tlscc void @tls_helper() 139 ret %class.C* @tC 140 } 141 142 ; Make sure we do not allow tail call when caller and callee have different 143 ; calling conventions. 144 declare %class.C* @_ZN1CD1Ev(%class.C* readnone returned %this) 145 ; CHECK-LABEL: tls_test 146 ; CHECK: callq {{.*}}tlv_atexit 147 define cxx_fast_tlscc void @tls_test() { 148 entry: 149 store i32 0, i32* getelementptr inbounds (%class.C, %class.C* @tC, i64 0, i32 0), align 4 150 %0 = tail call i32 @_tlv_atexit(void (i8*)* bitcast (%class.C* (%class.C*)* @_ZN1CD1Ev to void (i8*)*), i8* bitcast (%class.C* @tC to i8*), i8* nonnull @__dso_handle) #1 151 ret void 152 } 153 154 @ssp_var = internal thread_local global i8 0, align 1 155 156 ; CHECK-LABEL: test_ssp 157 ; CHECK-NOT: pushq %r11 158 ; CHECK-NOT: pushq %r10 159 ; CHECK-NOT: pushq %r9 160 ; CHECK-NOT: pushq %r8 161 ; CHECK-NOT: pushq %rsi 162 ; CHECK-NOT: pushq %rdx 163 ; CHECK-NOT: pushq %rcx 164 ; CHECK-NOT: pushq %rbx 165 ; CHECK: callq 166 define cxx_fast_tlscc nonnull i8* @test_ssp() #2 { 167 ret i8* @ssp_var 168 } 169 attributes #0 = { nounwind "no-frame-pointer-elim"="true" } 170 attributes #1 = { nounwind } 171 attributes #2 = { nounwind sspreq } 172