1 ; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 2 ; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST 3 4 ; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 5 ; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST 6 7 declare void @bar(i32) 8 9 ; Test a simple indirect call and tail call. 10 define void @icall_reg(void (i32)* %fp, i32 %x) #0 { 11 entry: 12 tail call void @bar(i32 %x) 13 tail call void %fp(i32 %x) 14 tail call void @bar(i32 %x) 15 tail call void %fp(i32 %x) 16 ret void 17 } 18 19 ; X64-LABEL: icall_reg: 20 ; X64-DAG: movq %rdi, %[[fp:[^ ]*]] 21 ; X64-DAG: movl %esi, %[[x:[^ ]*]] 22 ; X64: movl %esi, %edi 23 ; X64: callq bar 24 ; X64-DAG: movl %[[x]], %edi 25 ; X64-DAG: movq %[[fp]], %r11 26 ; X64: callq __llvm_retpoline_r11 27 ; X64: movl %[[x]], %edi 28 ; X64: callq bar 29 ; X64-DAG: movl %[[x]], %edi 30 ; X64-DAG: movq %[[fp]], %r11 31 ; X64: jmp __llvm_retpoline_r11 # TAILCALL 32 33 ; X64FAST-LABEL: icall_reg: 34 ; X64FAST: callq bar 35 ; X64FAST: callq __llvm_retpoline_r11 36 ; X64FAST: callq bar 37 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 38 39 ; X86-LABEL: icall_reg: 40 ; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]] 41 ; X86-DAG: movl 16(%esp), %[[x:[^ ]*]] 42 ; X86: pushl %[[x]] 43 ; X86: calll bar 44 ; X86: movl %[[fp]], %eax 45 ; X86: pushl %[[x]] 46 ; X86: calll __llvm_retpoline_eax 47 ; X86: pushl %[[x]] 48 ; X86: calll bar 49 ; X86: movl %[[fp]], %eax 50 ; X86: pushl %[[x]] 51 ; X86: calll __llvm_retpoline_eax 52 ; X86-NOT: # TAILCALL 53 54 ; X86FAST-LABEL: icall_reg: 55 ; X86FAST: calll bar 56 ; X86FAST: calll __llvm_retpoline_eax 57 ; X86FAST: calll bar 58 ; X86FAST: calll __llvm_retpoline_eax 59 60 61 @global_fp = external global void (i32)* 62 63 ; Test an indirect call through a global variable. 64 define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { 65 %fp1 = load void (i32)*, void (i32)** @global_fp 66 call void %fp1(i32 %x) 67 %fp2 = load void (i32)*, void (i32)** @global_fp 68 tail call void %fp2(i32 %x) 69 ret void 70 } 71 72 ; X64-LABEL: icall_global_fp: 73 ; X64-DAG: movl %edi, %[[x:[^ ]*]] 74 ; X64-DAG: movq global_fp(%rip), %r11 75 ; X64: callq __llvm_retpoline_r11 76 ; X64-DAG: movl %[[x]], %edi 77 ; X64-DAG: movq global_fp(%rip), %r11 78 ; X64: jmp __llvm_retpoline_r11 # TAILCALL 79 80 ; X64FAST-LABEL: icall_global_fp: 81 ; X64FAST: movq global_fp(%rip), %r11 82 ; X64FAST: callq __llvm_retpoline_r11 83 ; X64FAST: movq global_fp(%rip), %r11 84 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 85 86 ; X86-LABEL: icall_global_fp: 87 ; X86: movl global_fp, %eax 88 ; X86: pushl 4(%esp) 89 ; X86: calll __llvm_retpoline_eax 90 ; X86: addl $4, %esp 91 ; X86: movl global_fp, %eax 92 ; X86: jmp __llvm_retpoline_eax # TAILCALL 93 94 ; X86FAST-LABEL: icall_global_fp: 95 ; X86FAST: calll __llvm_retpoline_eax 96 ; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 97 98 99 %struct.Foo = type { void (%struct.Foo*)** } 100 101 ; Test an indirect call through a vtable. 102 define void @vcall(%struct.Foo* %obj) #0 { 103 %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 104 %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field 105 %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 106 %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot 107 tail call void %fp(%struct.Foo* %obj) 108 tail call void %fp(%struct.Foo* %obj) 109 ret void 110 } 111 112 ; X64-LABEL: vcall: 113 ; X64: movq %rdi, %[[obj:[^ ]*]] 114 ; X64: movq (%rdi), %[[vptr:[^ ]*]] 115 ; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] 116 ; X64: movq %[[fp]], %r11 117 ; X64: callq __llvm_retpoline_r11 118 ; X64-DAG: movq %[[obj]], %rdi 119 ; X64-DAG: movq %[[fp]], %r11 120 ; X64: jmp __llvm_retpoline_r11 # TAILCALL 121 122 ; X64FAST-LABEL: vcall: 123 ; X64FAST: callq __llvm_retpoline_r11 124 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 125 126 ; X86-LABEL: vcall: 127 ; X86: movl 8(%esp), %[[obj:[^ ]*]] 128 ; X86: movl (%[[obj]]), %[[vptr:[^ ]*]] 129 ; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]] 130 ; X86: movl %[[fp]], %eax 131 ; X86: pushl %[[obj]] 132 ; X86: calll __llvm_retpoline_eax 133 ; X86: addl $4, %esp 134 ; X86: movl %[[fp]], %eax 135 ; X86: jmp __llvm_retpoline_eax # TAILCALL 136 137 ; X86FAST-LABEL: vcall: 138 ; X86FAST: calll __llvm_retpoline_eax 139 ; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 140 141 142 declare void @direct_callee() 143 144 define void @direct_tail() #0 { 145 tail call void @direct_callee() 146 ret void 147 } 148 149 ; X64-LABEL: direct_tail: 150 ; X64: jmp direct_callee # TAILCALL 151 ; X64FAST-LABEL: direct_tail: 152 ; X64FAST: jmp direct_callee # TAILCALL 153 ; X86-LABEL: direct_tail: 154 ; X86: jmp direct_callee # TAILCALL 155 ; X86FAST-LABEL: direct_tail: 156 ; X86FAST: jmp direct_callee # TAILCALL 157 158 159 declare void @nonlazybind_callee() #1 160 161 define void @nonlazybind_caller() #0 { 162 call void @nonlazybind_callee() 163 tail call void @nonlazybind_callee() 164 ret void 165 } 166 167 ; X64-LABEL: nonlazybind_caller: 168 ; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] 169 ; X64: movq %[[REG]], %r11 170 ; X64: callq __llvm_retpoline_r11 171 ; X64: movq %[[REG]], %r11 172 ; X64: jmp __llvm_retpoline_r11 # TAILCALL 173 ; X64FAST-LABEL: nonlazybind_caller: 174 ; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 175 ; X64FAST: callq __llvm_retpoline_r11 176 ; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 177 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 178 ; X86-LABEL: nonlazybind_caller: 179 ; X86: calll nonlazybind_callee@PLT 180 ; X86: jmp nonlazybind_callee@PLT # TAILCALL 181 ; X86FAST-LABEL: nonlazybind_caller: 182 ; X86FAST: calll nonlazybind_callee@PLT 183 ; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL 184 185 186 @indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), 187 i8* blockaddress(@indirectbr_rewrite, %bb1), 188 i8* blockaddress(@indirectbr_rewrite, %bb2), 189 i8* blockaddress(@indirectbr_rewrite, %bb3), 190 i8* blockaddress(@indirectbr_rewrite, %bb4), 191 i8* blockaddress(@indirectbr_rewrite, %bb5), 192 i8* blockaddress(@indirectbr_rewrite, %bb6), 193 i8* blockaddress(@indirectbr_rewrite, %bb7), 194 i8* blockaddress(@indirectbr_rewrite, %bb8), 195 i8* blockaddress(@indirectbr_rewrite, %bb9)] 196 197 ; Check that when retpolines are enabled a function with indirectbr gets 198 ; rewritten to use switch, and that in turn doesn't get lowered as a jump 199 ; table. 200 define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 { 201 ; X64-LABEL: indirectbr_rewrite: 202 ; X64-NOT: jmpq 203 ; X86-LABEL: indirectbr_rewrite: 204 ; X86-NOT: jmpl 205 entry: 206 %i0 = load i64, i64* %p 207 %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0 208 %target0 = load i8*, i8** %target.i0 209 indirectbr i8* %target0, [label %bb1, label %bb3] 210 211 bb0: 212 store volatile i64 0, i64* %sink 213 br label %latch 214 215 bb1: 216 store volatile i64 1, i64* %sink 217 br label %latch 218 219 bb2: 220 store volatile i64 2, i64* %sink 221 br label %latch 222 223 bb3: 224 store volatile i64 3, i64* %sink 225 br label %latch 226 227 bb4: 228 store volatile i64 4, i64* %sink 229 br label %latch 230 231 bb5: 232 store volatile i64 5, i64* %sink 233 br label %latch 234 235 bb6: 236 store volatile i64 6, i64* %sink 237 br label %latch 238 239 bb7: 240 store volatile i64 7, i64* %sink 241 br label %latch 242 243 bb8: 244 store volatile i64 8, i64* %sink 245 br label %latch 246 247 bb9: 248 store volatile i64 9, i64* %sink 249 br label %latch 250 251 latch: 252 %i.next = load i64, i64* %p 253 %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next 254 %target.next = load i8*, i8** %target.i.next 255 ; Potentially hit a full 10 successors here so that even if we rewrite as 256 ; a switch it will try to be lowered with a jump table. 257 indirectbr i8* %target.next, [label %bb0, 258 label %bb1, 259 label %bb2, 260 label %bb3, 261 label %bb4, 262 label %bb5, 263 label %bb6, 264 label %bb7, 265 label %bb8, 266 label %bb9] 267 } 268 269 ; Lastly check that the necessary thunks were emitted. 270 ; 271 ; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat 272 ; X64-NEXT: .hidden __llvm_retpoline_r11 273 ; X64-NEXT: .weak __llvm_retpoline_r11 274 ; X64: __llvm_retpoline_r11: 275 ; X64-NEXT: # {{.*}} # %entry 276 ; X64-NEXT: callq [[CALL_TARGET:.*]] 277 ; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 278 ; X64-NEXT: # %entry 279 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 280 ; X64-NEXT: pause 281 ; X64-NEXT: lfence 282 ; X64-NEXT: jmp [[CAPTURE_SPEC]] 283 ; X64-NEXT: .p2align 4, 0x90 284 ; X64-NEXT: [[CALL_TARGET]]: # Block address taken 285 ; X64-NEXT: # %entry 286 ; X64-NEXT: movq %r11, (%rsp) 287 ; X64-NEXT: retq 288 ; 289 ; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat 290 ; X86-NEXT: .hidden __llvm_retpoline_eax 291 ; X86-NEXT: .weak __llvm_retpoline_eax 292 ; X86: __llvm_retpoline_eax: 293 ; X86-NEXT: # {{.*}} # %entry 294 ; X86-NEXT: calll [[CALL_TARGET:.*]] 295 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 296 ; X86-NEXT: # %entry 297 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 298 ; X86-NEXT: pause 299 ; X86-NEXT: lfence 300 ; X86-NEXT: jmp [[CAPTURE_SPEC]] 301 ; X86-NEXT: .p2align 4, 0x90 302 ; X86-NEXT: [[CALL_TARGET]]: # Block address taken 303 ; X86-NEXT: # %entry 304 ; X86-NEXT: movl %eax, (%esp) 305 ; X86-NEXT: retl 306 ; 307 ; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat 308 ; X86-NEXT: .hidden __llvm_retpoline_ecx 309 ; X86-NEXT: .weak __llvm_retpoline_ecx 310 ; X86: __llvm_retpoline_ecx: 311 ; X86-NEXT: # {{.*}} # %entry 312 ; X86-NEXT: calll [[CALL_TARGET:.*]] 313 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 314 ; X86-NEXT: # %entry 315 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 316 ; X86-NEXT: pause 317 ; X86-NEXT: lfence 318 ; X86-NEXT: jmp [[CAPTURE_SPEC]] 319 ; X86-NEXT: .p2align 4, 0x90 320 ; X86-NEXT: [[CALL_TARGET]]: # Block address taken 321 ; X86-NEXT: # %entry 322 ; X86-NEXT: movl %ecx, (%esp) 323 ; X86-NEXT: retl 324 ; 325 ; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat 326 ; X86-NEXT: .hidden __llvm_retpoline_edx 327 ; X86-NEXT: .weak __llvm_retpoline_edx 328 ; X86: __llvm_retpoline_edx: 329 ; X86-NEXT: # {{.*}} # %entry 330 ; X86-NEXT: calll [[CALL_TARGET:.*]] 331 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 332 ; X86-NEXT: # %entry 333 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 334 ; X86-NEXT: pause 335 ; X86-NEXT: lfence 336 ; X86-NEXT: jmp [[CAPTURE_SPEC]] 337 ; X86-NEXT: .p2align 4, 0x90 338 ; X86-NEXT: [[CALL_TARGET]]: # Block address taken 339 ; X86-NEXT: # %entry 340 ; X86-NEXT: movl %edx, (%esp) 341 ; X86-NEXT: retl 342 ; 343 ; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat 344 ; X86-NEXT: .hidden __llvm_retpoline_edi 345 ; X86-NEXT: .weak __llvm_retpoline_edi 346 ; X86: __llvm_retpoline_edi: 347 ; X86-NEXT: # {{.*}} # %entry 348 ; X86-NEXT: calll [[CALL_TARGET:.*]] 349 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 350 ; X86-NEXT: # %entry 351 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 352 ; X86-NEXT: pause 353 ; X86-NEXT: lfence 354 ; X86-NEXT: jmp [[CAPTURE_SPEC]] 355 ; X86-NEXT: .p2align 4, 0x90 356 ; X86-NEXT: [[CALL_TARGET]]: # Block address taken 357 ; X86-NEXT: # %entry 358 ; X86-NEXT: movl %edi, (%esp) 359 ; X86-NEXT: retl 360 361 362 attributes #0 = { "target-features"="+retpoline" } 363 attributes #1 = { nonlazybind } 364