Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
      3 ; REQUIRES: asserts
      4 
      5 target triple = "x86_64-pc-linux-gnu"
      6 
      7 ; Can we lower a single vector?
      8 define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
      9 ; CHECK-LABEL: test:
     10 ; CHECK:       # %bb.0: # %entry
     11 ; CHECK-NEXT:    subq $24, %rsp
     12 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
     13 ; CHECK-NEXT:    movaps %xmm0, (%rsp)
     14 ; CHECK-NEXT:    callq do_safepoint
     15 ; CHECK-NEXT:  .Ltmp0:
     16 ; CHECK-NEXT:    movaps (%rsp), %xmm0
     17 ; CHECK-NEXT:    addq $24, %rsp
     18 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
     19 ; CHECK-NEXT:    retq
     20 entry:
     21   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
     22   %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
     23   ret <2 x i8 addrspace(1)*> %obj.relocated
     24 }
     25 
     26 ; Can we lower the base, derived pairs if both are vectors?
     27 define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" {
     28 ; CHECK-LABEL: test2:
     29 ; CHECK:       # %bb.0: # %entry
     30 ; CHECK-NEXT:    subq $40, %rsp
     31 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
     32 ; CHECK-NEXT:    movq %rdi, %xmm1
     33 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
     34 ; CHECK-NEXT:    paddq %xmm0, %xmm1
     35 ; CHECK-NEXT:    movdqa %xmm0, {{[0-9]+}}(%rsp)
     36 ; CHECK-NEXT:    movdqa %xmm1, (%rsp)
     37 ; CHECK-NEXT:    callq do_safepoint
     38 ; CHECK-NEXT:  .Ltmp1:
     39 ; CHECK-NEXT:    movaps (%rsp), %xmm0
     40 ; CHECK-NEXT:    addq $40, %rsp
     41 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
     42 ; CHECK-NEXT:    retq
     43 entry:
     44   %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset
     45   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived)
     46   %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived)
     47   ret <2 x i8 addrspace(1)*> %derived.relocated
     48 }
     49 
     50 ; Originally, this was just a variant of @test2 above, but it ends up
     51 ; covering a bunch of interesting missed optimizations.  Specifically:
     52 ; - We waste a stack slot for a value that a backend transform pass
     53 ;   CSEd to another spilled one.
     54 ; - We don't remove the testb even though it serves no purpose
     55 ; - We could in principal reuse the argument memory (%rsi) and do away
     56 ;   with stack slots entirely.
     57 define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
     58 ; CHECK-LABEL: test3:
     59 ; CHECK:       # %bb.0: # %entry
     60 ; CHECK-NEXT:    subq $40, %rsp
     61 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
     62 ; CHECK-NEXT:    testb $1, %dil
     63 ; CHECK-NEXT:    movaps (%rsi), %xmm0
     64 ; CHECK-NEXT:    movaps %xmm0, (%rsp)
     65 ; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
     66 ; CHECK-NEXT:    callq do_safepoint
     67 ; CHECK-NEXT:  .Ltmp2:
     68 ; CHECK-NEXT:    movaps (%rsp), %xmm0
     69 ; CHECK-NEXT:    addq $40, %rsp
     70 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
     71 ; CHECK-NEXT:    retq
     72 entry:
     73   br i1 %cnd, label %taken, label %untaken
     74 
     75 taken:                                            ; preds = %entry
     76   %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
     77   br label %merge
     78 
     79 untaken:                                          ; preds = %entry
     80   %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
     81   br label %merge
     82 
     83 merge:                                            ; preds = %untaken, %taken
     84   %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
     85   %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
     86   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base)
     87   %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj)
     88   %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*>
     89   %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base)
     90   %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*>
     91   ret <2 x i64 addrspace(1)*> %obj.relocated.casted
     92 }
     93 
     94 ; Can we handle vector constants?  At the moment, we don't appear to actually
     95 ; get selection dag nodes for these.
     96 define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" {
     97 ; CHECK-LABEL: test4:
     98 ; CHECK:       # %bb.0: # %entry
     99 ; CHECK-NEXT:    subq $24, %rsp
    100 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
    101 ; CHECK-NEXT:    xorps %xmm0, %xmm0
    102 ; CHECK-NEXT:    movaps %xmm0, (%rsp)
    103 ; CHECK-NEXT:    callq do_safepoint
    104 ; CHECK-NEXT:  .Ltmp3:
    105 ; CHECK-NEXT:    movaps (%rsp), %xmm0
    106 ; CHECK-NEXT:    addq $24, %rsp
    107 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
    108 ; CHECK-NEXT:    retq
    109 entry:
    110   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer)
    111   %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
    112   ret <2 x i8 addrspace(1)*> %obj.relocated
    113 }
    114 
    115 ; Check that we can lower a constant typed as i128 correctly.  Note that the
    116 ; actual value is representable in 64 bits.  We don't have a representation
    117 ; of larger than 64 bit constant in the StackMap format.
    118 define void @test5() gc "statepoint-example" {
    119 ; CHECK-LABEL: test5:
    120 ; CHECK:       # %bb.0: # %entry
    121 ; CHECK-NEXT:    pushq %rax
    122 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
    123 ; CHECK-NEXT:    callq do_safepoint
    124 ; CHECK-NEXT:  .Ltmp4:
    125 ; CHECK-NEXT:    popq %rax
    126 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
    127 ; CHECK-NEXT:    retq
    128 entry:
    129   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0)
    130   ret void
    131 }
    132 
    133 ; CHECK: __LLVM_StackMaps:
    134 
    135 ; CHECK: .Ltmp0-test
    136 ; Check for the two spill slots
    137 ; Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
    138 ; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
    139 ; CHECK: .byte	3
    140 ; CHECK: .byte	0
    141 ; CHECK: .short 16
    142 ; CHECK: .short	7
    143 ; CHECK: .short	0
    144 ; CHECK: .long	0
    145 ; CHECK: .byte	3
    146 ; CHECK: .byte	0
    147 ; CHECK: .short 16
    148 ; CHECK: .short	7
    149 ; CHECK: .short	0
    150 ; CHECK: .long	0
    151 
    152 ; CHECK: .Ltmp1-test2
    153 ; Check for the two spill slots
    154 ; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
    155 ; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
    156 ; CHECK: .byte	3
    157 ; CHECK: .byte	0
    158 ; CHECK: .short 16
    159 ; CHECK: .short	7
    160 ; CHECK: .short	0
    161 ; CHECK: .long	16
    162 ; CHECK: .byte	3
    163 ; CHECK: .byte	0
    164 ; CHECK: .short 16
    165 ; CHECK: .short	7
    166 ; CHECK: .short	0
    167 ; CHECK: .long	0
    168 
    169 ; CHECK: .Ltmp2-test3
    170 ; Check for the four spill slots
    171 ; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
    172 ; Stack Maps: 		Loc 4: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
    173 ; Stack Maps: 		Loc 5: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
    174 ; Stack Maps: 		Loc 6: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
    175 ; CHECK: .byte	3
    176 ; CHECK: .byte	0
    177 ; CHECK: .short 16
    178 ; CHECK: .short	7
    179 ; CHECK: .short	0
    180 ; CHECK: .long	16
    181 ; CHECK: .byte	3
    182 ; CHECK: .byte	 0
    183 ; CHECK: .short 16
    184 ; CHECK: .short	7
    185 ; CHECK: .short	0
    186 ; CHECK: .long	16
    187 ; CHECK: .byte	3
    188 ; CHECK: .byte	 0
    189 ; CHECK: .short 16
    190 ; CHECK: .short	7
    191 ; CHECK: .short	0
    192 ; CHECK: .long	16
    193 ; CHECK: .byte	3
    194 ; CHECK: .byte	 0
    195 ; CHECK: .short 16
    196 ; CHECK: .short	7
    197 ; CHECK: .short	0
    198 ; CHECK: .long	0
    199 
    200 declare void @do_safepoint()
    201 
    202 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
    203 declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
    204 declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
    205