1 ; RUN: llc -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s 2 ; REQUIRES: asserts 3 4 target triple = "x86_64-pc-linux-gnu" 5 6 ; Can we lower a single vector? 7 define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" { 8 entry: 9 ; CHECK-LABEL: @test 10 ; CHECK: subq $24, %rsp 11 ; CHECK: movaps %xmm0, (%rsp) 12 ; CHECK: callq do_safepoint 13 ; CHECK: movaps (%rsp), %xmm0 14 ; CHECK: addq $24, %rsp 15 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj) 16 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) 17 ret <2 x i8 addrspace(1)*> %obj.relocated 18 } 19 20 ; Can we lower the base, derived pairs if both are vectors? 21 define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" { 22 entry: 23 ; CHECK-LABEL: @test2 24 ; CHECK: subq $40, %rsp 25 ; CHECK: movd %rdi, %xmm1 26 ; CHECK: pshufd $68, %xmm1, %xmm1 # xmm1 = xmm1[0,1,0,1] 27 ; CHECK: paddq %xmm0, %xmm1 28 ; CHECK: movdqa %xmm0, 16(%rsp) 29 ; CHECK: movdqa %xmm1, (%rsp) 30 ; CHECK: callq do_safepoint 31 ; CHECK: movaps (%rsp), %xmm0 32 ; CHECK: addq $40, %rsp 33 %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset 34 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived) 35 %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived) 36 ret <2 x i8 addrspace(1)*> %derived.relocated 37 } 38 39 ; Originally, this was just a variant of @test2 above, but it ends up 40 ; covering a bunch of interesting missed optimizations. Specifically: 41 ; - We waste a stack slot for a value that a backend transform pass 42 ; CSEd to another spilled one. 43 ; - We don't remove the testb even though it serves no purpose 44 ; - We could in principal reuse the argument memory (%rsi) and do away 45 ; with stack slots entirely. 46 define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { 47 entry: 48 ; CHECK-LABEL: @test3 49 ; CHECK: subq $40, %rsp 50 ; CHECK: testb $1, %dil 51 ; CHECK: movaps (%rsi), %xmm0 52 ; CHECK: movaps %xmm0, 16(%rsp) 53 ; CHECK: movaps %xmm0, (%rsp) 54 ; CHECK: callq do_safepoint 55 ; CHECK: movaps (%rsp), %xmm0 56 ; CHECK: addq $40, %rsp 57 br i1 %cnd, label %taken, label %untaken 58 59 taken: ; preds = %entry 60 %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr 61 br label %merge 62 63 untaken: ; preds = %entry 64 %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr 65 br label %merge 66 67 merge: ; preds = %untaken, %taken 68 %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] 69 %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] 70 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base) 71 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj) 72 %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*> 73 %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base) 74 %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*> 75 ret <2 x i64 addrspace(1)*> %obj.relocated.casted 76 } 77 78 ; Can we handle vector constants? At the moment, we don't appear to actually 79 ; get selection dag nodes for these. 80 define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" { 81 entry: 82 ; CHECK-LABEL: @test4 83 ; CHECK: subq $24, %rsp 84 ; CHECK: xorps %xmm0, %xmm0 85 ; CHECK: movaps %xmm0, (%rsp) 86 ; CHECK: callq do_safepoint 87 ; CHECK: movaps (%rsp), %xmm0 88 ; CHECK: addq $24, %rsp 89 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer) 90 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) 91 ret <2 x i8 addrspace(1)*> %obj.relocated 92 } 93 94 ; Check that we can lower a constant typed as i128 correctly. Note that the 95 ; actual value is representable in 64 bits. We don't have a representation 96 ; of larger than 64 bit constant in the StackMap format. 97 define void @test5() gc "statepoint-example" { 98 entry: 99 ; CHECK-LABEL: @test5 100 ; CHECK: push 101 ; CHECK: callq do_safepoint 102 ; CHECK: pop 103 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0) 104 ret void 105 } 106 107 ; CHECK: __LLVM_StackMaps: 108 109 ; CHECK: .Ltmp1-test 110 ; Check for the two spill slots 111 ; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] 112 ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] 113 ; CHECK: .byte 3 114 ; CHECK: .byte 16 115 ; CHECK: .short 7 116 ; CHECK: .long 0 117 ; CHECK: .byte 3 118 ; CHECK: .byte 16 119 ; CHECK: .short 7 120 ; CHECK: .long 0 121 122 ; CHECK: .Ltmp3-test2 123 ; Check for the two spill slots 124 ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] 125 ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] 126 ; CHECK: .byte 3 127 ; CHECK: .byte 16 128 ; CHECK: .short 7 129 ; CHECK: .long 16 130 ; CHECK: .byte 3 131 ; CHECK: .byte 16 132 ; CHECK: .short 7 133 ; CHECK: .long 0 134 135 ; CHECK: .Ltmp5-test3 136 ; Check for the four spill slots 137 ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] 138 ; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] 139 ; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] 140 ; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] 141 ; CHECK: .byte 3 142 ; CHECK: .byte 16 143 ; CHECK: .short 7 144 ; CHECK: .long 16 145 ; CHECK: .byte 3 146 ; CHECK: .byte 16 147 ; CHECK: .short 7 148 ; CHECK: .long 16 149 ; CHECK: .byte 3 150 ; CHECK: .byte 16 151 ; CHECK: .short 7 152 ; CHECK: .long 16 153 ; CHECK: .byte 3 154 ; CHECK: .byte 16 155 ; CHECK: .short 7 156 ; CHECK: .long 0 157 158 declare void @do_safepoint() 159 160 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) 161 declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) 162 declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32) 163