Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
      2 ; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
      3 
      4 ; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
      5 ; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
      6 
      7 declare void @bar(i32)
      8 
      9 ; Test a simple indirect call and tail call.
     10 define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
     11 entry:
     12   tail call void @bar(i32 %x)
     13   tail call void %fp(i32 %x)
     14   tail call void @bar(i32 %x)
     15   tail call void %fp(i32 %x)
     16   ret void
     17 }
     18 
     19 ; X64-LABEL: icall_reg:
     20 ; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
     21 ; X64-DAG:   movl %esi, %[[x:[^ ]*]]
     22 ; X64:       movl %esi, %edi
     23 ; X64:       callq bar
     24 ; X64-DAG:   movl %[[x]], %edi
     25 ; X64-DAG:   movq %[[fp]], %r11
     26 ; X64:       callq __llvm_retpoline_r11
     27 ; X64:       movl %[[x]], %edi
     28 ; X64:       callq bar
     29 ; X64-DAG:   movl %[[x]], %edi
     30 ; X64-DAG:   movq %[[fp]], %r11
     31 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
     32 
     33 ; X64FAST-LABEL: icall_reg:
     34 ; X64FAST:       callq bar
     35 ; X64FAST:       callq __llvm_retpoline_r11
     36 ; X64FAST:       callq bar
     37 ; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
     38 
     39 ; X86-LABEL: icall_reg:
     40 ; X86-DAG:   movl 12(%esp), %[[fp:[^ ]*]]
     41 ; X86-DAG:   movl 16(%esp), %[[x:[^ ]*]]
     42 ; X86:       pushl %[[x]]
     43 ; X86:       calll bar
     44 ; X86:       movl %[[fp]], %eax
     45 ; X86:       pushl %[[x]]
     46 ; X86:       calll __llvm_retpoline_eax
     47 ; X86:       pushl %[[x]]
     48 ; X86:       calll bar
     49 ; X86:       movl %[[fp]], %eax
     50 ; X86:       pushl %[[x]]
     51 ; X86:       calll __llvm_retpoline_eax
     52 ; X86-NOT:   # TAILCALL
     53 
     54 ; X86FAST-LABEL: icall_reg:
     55 ; X86FAST:       calll bar
     56 ; X86FAST:       calll __llvm_retpoline_eax
     57 ; X86FAST:       calll bar
     58 ; X86FAST:       calll __llvm_retpoline_eax
     59 
     60 
     61 @global_fp = external global void (i32)*
     62 
     63 ; Test an indirect call through a global variable.
     64 define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
     65   %fp1 = load void (i32)*, void (i32)** @global_fp
     66   call void %fp1(i32 %x)
     67   %fp2 = load void (i32)*, void (i32)** @global_fp
     68   tail call void %fp2(i32 %x)
     69   ret void
     70 }
     71 
     72 ; X64-LABEL: icall_global_fp:
     73 ; X64-DAG:   movl %edi, %[[x:[^ ]*]]
     74 ; X64-DAG:   movq global_fp(%rip), %r11
     75 ; X64:       callq __llvm_retpoline_r11
     76 ; X64-DAG:   movl %[[x]], %edi
     77 ; X64-DAG:   movq global_fp(%rip), %r11
     78 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
     79 
     80 ; X64FAST-LABEL: icall_global_fp:
     81 ; X64FAST:       movq global_fp(%rip), %r11
     82 ; X64FAST:       callq __llvm_retpoline_r11
     83 ; X64FAST:       movq global_fp(%rip), %r11
     84 ; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
     85 
     86 ; X86-LABEL: icall_global_fp:
     87 ; X86:       movl global_fp, %eax
     88 ; X86:       pushl 4(%esp)
     89 ; X86:       calll __llvm_retpoline_eax
     90 ; X86:       addl $4, %esp
     91 ; X86:       movl global_fp, %eax
     92 ; X86:       jmp __llvm_retpoline_eax # TAILCALL
     93 
     94 ; X86FAST-LABEL: icall_global_fp:
     95 ; X86FAST:       calll __llvm_retpoline_eax
     96 ; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
     97 
     98 
     99 %struct.Foo = type { void (%struct.Foo*)** }
    100 
    101 ; Test an indirect call through a vtable.
    102 define void @vcall(%struct.Foo* %obj) #0 {
    103   %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
    104   %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
    105   %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
    106   %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
    107   tail call void %fp(%struct.Foo* %obj)
    108   tail call void %fp(%struct.Foo* %obj)
    109   ret void
    110 }
    111 
    112 ; X64-LABEL: vcall:
    113 ; X64:       movq %rdi, %[[obj:[^ ]*]]
    114 ; X64:       movq (%rdi), %[[vptr:[^ ]*]]
    115 ; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
    116 ; X64:       movq %[[fp]], %r11
    117 ; X64:       callq __llvm_retpoline_r11
    118 ; X64-DAG:   movq %[[obj]], %rdi
    119 ; X64-DAG:   movq %[[fp]], %r11
    120 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
    121 
    122 ; X64FAST-LABEL: vcall:
    123 ; X64FAST:       callq __llvm_retpoline_r11
    124 ; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
    125 
    126 ; X86-LABEL: vcall:
    127 ; X86:       movl 8(%esp), %[[obj:[^ ]*]]
    128 ; X86:       movl (%[[obj]]), %[[vptr:[^ ]*]]
    129 ; X86:       movl 4(%[[vptr]]), %[[fp:[^ ]*]]
    130 ; X86:       movl %[[fp]], %eax
    131 ; X86:       pushl %[[obj]]
    132 ; X86:       calll __llvm_retpoline_eax
    133 ; X86:       addl $4, %esp
    134 ; X86:       movl %[[fp]], %eax
    135 ; X86:       jmp __llvm_retpoline_eax # TAILCALL
    136 
    137 ; X86FAST-LABEL: vcall:
    138 ; X86FAST:       calll __llvm_retpoline_eax
    139 ; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
    140 
    141 
    142 declare void @direct_callee()
    143 
    144 define void @direct_tail() #0 {
    145   tail call void @direct_callee()
    146   ret void
    147 }
    148 
    149 ; X64-LABEL: direct_tail:
    150 ; X64:       jmp direct_callee # TAILCALL
    151 ; X64FAST-LABEL: direct_tail:
    152 ; X64FAST:   jmp direct_callee # TAILCALL
    153 ; X86-LABEL: direct_tail:
    154 ; X86:       jmp direct_callee # TAILCALL
    155 ; X86FAST-LABEL: direct_tail:
    156 ; X86FAST:   jmp direct_callee # TAILCALL
    157 
    158 
    159 declare void @nonlazybind_callee() #1
    160 
    161 define void @nonlazybind_caller() #0 {
    162   call void @nonlazybind_callee()
    163   tail call void @nonlazybind_callee()
    164   ret void
    165 }
    166 
    167 ; X64-LABEL: nonlazybind_caller:
    168 ; X64:       movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
    169 ; X64:       movq %[[REG]], %r11
    170 ; X64:       callq __llvm_retpoline_r11
    171 ; X64:       movq %[[REG]], %r11
    172 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
    173 ; X64FAST-LABEL: nonlazybind_caller:
    174 ; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
    175 ; X64FAST:   callq __llvm_retpoline_r11
    176 ; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
    177 ; X64FAST:   jmp __llvm_retpoline_r11 # TAILCALL
    178 ; X86-LABEL: nonlazybind_caller:
    179 ; X86:       calll nonlazybind_callee@PLT
    180 ; X86:       jmp nonlazybind_callee@PLT # TAILCALL
    181 ; X86FAST-LABEL: nonlazybind_caller:
    182 ; X86FAST:   calll nonlazybind_callee@PLT
    183 ; X86FAST:   jmp nonlazybind_callee@PLT # TAILCALL
    184 
    185 
    186 @indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
    187                                                    i8* blockaddress(@indirectbr_rewrite, %bb1),
    188                                                    i8* blockaddress(@indirectbr_rewrite, %bb2),
    189                                                    i8* blockaddress(@indirectbr_rewrite, %bb3),
    190                                                    i8* blockaddress(@indirectbr_rewrite, %bb4),
    191                                                    i8* blockaddress(@indirectbr_rewrite, %bb5),
    192                                                    i8* blockaddress(@indirectbr_rewrite, %bb6),
    193                                                    i8* blockaddress(@indirectbr_rewrite, %bb7),
    194                                                    i8* blockaddress(@indirectbr_rewrite, %bb8),
    195                                                    i8* blockaddress(@indirectbr_rewrite, %bb9)]
    196 
    197 ; Check that when retpolines are enabled a function with indirectbr gets
    198 ; rewritten to use switch, and that in turn doesn't get lowered as a jump
    199 ; table.
    200 define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 {
    201 ; X64-LABEL: indirectbr_rewrite:
    202 ; X64-NOT:     jmpq
    203 ; X86-LABEL: indirectbr_rewrite:
    204 ; X86-NOT:     jmpl
    205 entry:
    206   %i0 = load i64, i64* %p
    207   %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
    208   %target0 = load i8*, i8** %target.i0
    209   indirectbr i8* %target0, [label %bb1, label %bb3]
    210 
    211 bb0:
    212   store volatile i64 0, i64* %sink
    213   br label %latch
    214 
    215 bb1:
    216   store volatile i64 1, i64* %sink
    217   br label %latch
    218 
    219 bb2:
    220   store volatile i64 2, i64* %sink
    221   br label %latch
    222 
    223 bb3:
    224   store volatile i64 3, i64* %sink
    225   br label %latch
    226 
    227 bb4:
    228   store volatile i64 4, i64* %sink
    229   br label %latch
    230 
    231 bb5:
    232   store volatile i64 5, i64* %sink
    233   br label %latch
    234 
    235 bb6:
    236   store volatile i64 6, i64* %sink
    237   br label %latch
    238 
    239 bb7:
    240   store volatile i64 7, i64* %sink
    241   br label %latch
    242 
    243 bb8:
    244   store volatile i64 8, i64* %sink
    245   br label %latch
    246 
    247 bb9:
    248   store volatile i64 9, i64* %sink
    249   br label %latch
    250 
    251 latch:
    252   %i.next = load i64, i64* %p
    253   %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
    254   %target.next = load i8*, i8** %target.i.next
    255   ; Potentially hit a full 10 successors here so that even if we rewrite as
    256   ; a switch it will try to be lowered with a jump table.
    257   indirectbr i8* %target.next, [label %bb0,
    258                                 label %bb1,
    259                                 label %bb2,
    260                                 label %bb3,
    261                                 label %bb4,
    262                                 label %bb5,
    263                                 label %bb6,
    264                                 label %bb7,
    265                                 label %bb8,
    266                                 label %bb9]
    267 }
    268 
    269 ; Lastly check that the necessary thunks were emitted.
    270 ;
    271 ; X64-LABEL:         .section        .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
    272 ; X64-NEXT:          .hidden __llvm_retpoline_r11
    273 ; X64-NEXT:          .weak   __llvm_retpoline_r11
    274 ; X64:       __llvm_retpoline_r11:
    275 ; X64-NEXT:  # {{.*}}                                # %entry
    276 ; X64-NEXT:          callq   [[CALL_TARGET:.*]]
    277 ; X64-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
    278 ; X64-NEXT:                                          # %entry
    279 ; X64-NEXT:                                          # =>This Inner Loop Header: Depth=1
    280 ; X64-NEXT:          pause
    281 ; X64-NEXT:          lfence
    282 ; X64-NEXT:          jmp     [[CAPTURE_SPEC]]
    283 ; X64-NEXT:          .p2align        4, 0x90
    284 ; X64-NEXT:  [[CALL_TARGET]]:                        # Block address taken
    285 ; X64-NEXT:                                          # %entry
    286 ; X64-NEXT:          movq    %r11, (%rsp)
    287 ; X64-NEXT:          retq
    288 ;
    289 ; X86-LABEL:         .section        .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
    290 ; X86-NEXT:          .hidden __llvm_retpoline_eax
    291 ; X86-NEXT:          .weak   __llvm_retpoline_eax
    292 ; X86:       __llvm_retpoline_eax:
    293 ; X86-NEXT:  # {{.*}}                                # %entry
    294 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
    295 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
    296 ; X86-NEXT:                                          # %entry
    297 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
    298 ; X86-NEXT:          pause
    299 ; X86-NEXT:          lfence
    300 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
    301 ; X86-NEXT:          .p2align        4, 0x90
    302 ; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
    303 ; X86-NEXT:                                          # %entry
    304 ; X86-NEXT:          movl    %eax, (%esp)
    305 ; X86-NEXT:          retl
    306 ;
    307 ; X86-LABEL:         .section        .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
    308 ; X86-NEXT:          .hidden __llvm_retpoline_ecx
    309 ; X86-NEXT:          .weak   __llvm_retpoline_ecx
    310 ; X86:       __llvm_retpoline_ecx:
    311 ; X86-NEXT:  # {{.*}}                                # %entry
    312 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
    313 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
    314 ; X86-NEXT:                                          # %entry
    315 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
    316 ; X86-NEXT:          pause
    317 ; X86-NEXT:          lfence
    318 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
    319 ; X86-NEXT:          .p2align        4, 0x90
    320 ; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
    321 ; X86-NEXT:                                          # %entry
    322 ; X86-NEXT:          movl    %ecx, (%esp)
    323 ; X86-NEXT:          retl
    324 ;
    325 ; X86-LABEL:         .section        .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
    326 ; X86-NEXT:          .hidden __llvm_retpoline_edx
    327 ; X86-NEXT:          .weak   __llvm_retpoline_edx
    328 ; X86:       __llvm_retpoline_edx:
    329 ; X86-NEXT:  # {{.*}}                                # %entry
    330 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
    331 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
    332 ; X86-NEXT:                                          # %entry
    333 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
    334 ; X86-NEXT:          pause
    335 ; X86-NEXT:          lfence
    336 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
    337 ; X86-NEXT:          .p2align        4, 0x90
    338 ; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
    339 ; X86-NEXT:                                          # %entry
    340 ; X86-NEXT:          movl    %edx, (%esp)
    341 ; X86-NEXT:          retl
    342 ;
    343 ; X86-LABEL:         .section        .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
    344 ; X86-NEXT:          .hidden __llvm_retpoline_edi
    345 ; X86-NEXT:          .weak   __llvm_retpoline_edi
    346 ; X86:       __llvm_retpoline_edi:
    347 ; X86-NEXT:  # {{.*}}                                # %entry
    348 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
    349 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
    350 ; X86-NEXT:                                          # %entry
    351 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
    352 ; X86-NEXT:          pause
    353 ; X86-NEXT:          lfence
    354 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
    355 ; X86-NEXT:          .p2align        4, 0x90
    356 ; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
    357 ; X86-NEXT:                                          # %entry
    358 ; X86-NEXT:          movl    %edi, (%esp)
    359 ; X86-NEXT:          retl
    360 
    361 
    362 attributes #0 = { "target-features"="+retpoline" }
    363 attributes #1 = { nonlazybind }
    364