1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s 3 ; REQUIRES: asserts 4 5 target triple = "x86_64-pc-linux-gnu" 6 7 ; Can we lower a single vector? 8 define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" { 9 ; CHECK-LABEL: test: 10 ; CHECK: # %bb.0: # %entry 11 ; CHECK-NEXT: subq $24, %rsp 12 ; CHECK-NEXT: .cfi_def_cfa_offset 32 13 ; CHECK-NEXT: movaps %xmm0, (%rsp) 14 ; CHECK-NEXT: callq do_safepoint 15 ; CHECK-NEXT: .Ltmp0: 16 ; CHECK-NEXT: movaps (%rsp), %xmm0 17 ; CHECK-NEXT: addq $24, %rsp 18 ; CHECK-NEXT: .cfi_def_cfa_offset 8 19 ; CHECK-NEXT: retq 20 entry: 21 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj) 22 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) 23 ret <2 x i8 addrspace(1)*> %obj.relocated 24 } 25 26 ; Can we lower the base, derived pairs if both are vectors? 27 define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" { 28 ; CHECK-LABEL: test2: 29 ; CHECK: # %bb.0: # %entry 30 ; CHECK-NEXT: subq $40, %rsp 31 ; CHECK-NEXT: .cfi_def_cfa_offset 48 32 ; CHECK-NEXT: movq %rdi, %xmm1 33 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 34 ; CHECK-NEXT: paddq %xmm0, %xmm1 35 ; CHECK-NEXT: movdqa %xmm0, {{[0-9]+}}(%rsp) 36 ; CHECK-NEXT: movdqa %xmm1, (%rsp) 37 ; CHECK-NEXT: callq do_safepoint 38 ; CHECK-NEXT: .Ltmp1: 39 ; CHECK-NEXT: movaps (%rsp), %xmm0 40 ; CHECK-NEXT: addq $40, %rsp 41 ; CHECK-NEXT: .cfi_def_cfa_offset 8 42 ; CHECK-NEXT: retq 43 entry: 44 %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset 45 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived) 46 %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived) 47 ret <2 x i8 addrspace(1)*> %derived.relocated 48 } 49 50 ; Originally, this was just a variant of @test2 above, but it ends up 51 ; covering a bunch of interesting missed optimizations. Specifically: 52 ; - We waste a stack slot for a value that a backend transform pass 53 ; CSEd to another spilled one. 54 ; - We don't remove the testb even though it serves no purpose 55 ; - We could in principal reuse the argument memory (%rsi) and do away 56 ; with stack slots entirely. 57 define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { 58 ; CHECK-LABEL: test3: 59 ; CHECK: # %bb.0: # %entry 60 ; CHECK-NEXT: subq $40, %rsp 61 ; CHECK-NEXT: .cfi_def_cfa_offset 48 62 ; CHECK-NEXT: testb $1, %dil 63 ; CHECK-NEXT: movaps (%rsi), %xmm0 64 ; CHECK-NEXT: movaps %xmm0, (%rsp) 65 ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) 66 ; CHECK-NEXT: callq do_safepoint 67 ; CHECK-NEXT: .Ltmp2: 68 ; CHECK-NEXT: movaps (%rsp), %xmm0 69 ; CHECK-NEXT: addq $40, %rsp 70 ; CHECK-NEXT: .cfi_def_cfa_offset 8 71 ; CHECK-NEXT: retq 72 entry: 73 br i1 %cnd, label %taken, label %untaken 74 75 taken: ; preds = %entry 76 %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr 77 br label %merge 78 79 untaken: ; preds = %entry 80 %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr 81 br label %merge 82 83 merge: ; preds = %untaken, %taken 84 %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] 85 %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] 86 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base) 87 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj) 88 %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*> 89 %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base) 90 %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*> 91 ret <2 x i64 addrspace(1)*> %obj.relocated.casted 92 } 93 94 ; Can we handle vector constants? At the moment, we don't appear to actually 95 ; get selection dag nodes for these. 96 define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" { 97 ; CHECK-LABEL: test4: 98 ; CHECK: # %bb.0: # %entry 99 ; CHECK-NEXT: subq $24, %rsp 100 ; CHECK-NEXT: .cfi_def_cfa_offset 32 101 ; CHECK-NEXT: xorps %xmm0, %xmm0 102 ; CHECK-NEXT: movaps %xmm0, (%rsp) 103 ; CHECK-NEXT: callq do_safepoint 104 ; CHECK-NEXT: .Ltmp3: 105 ; CHECK-NEXT: movaps (%rsp), %xmm0 106 ; CHECK-NEXT: addq $24, %rsp 107 ; CHECK-NEXT: .cfi_def_cfa_offset 8 108 ; CHECK-NEXT: retq 109 entry: 110 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer) 111 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) 112 ret <2 x i8 addrspace(1)*> %obj.relocated 113 } 114 115 ; Check that we can lower a constant typed as i128 correctly. Note that the 116 ; actual value is representable in 64 bits. We don't have a representation 117 ; of larger than 64 bit constant in the StackMap format. 118 define void @test5() gc "statepoint-example" { 119 ; CHECK-LABEL: test5: 120 ; CHECK: # %bb.0: # %entry 121 ; CHECK-NEXT: pushq %rax 122 ; CHECK-NEXT: .cfi_def_cfa_offset 16 123 ; CHECK-NEXT: callq do_safepoint 124 ; CHECK-NEXT: .Ltmp4: 125 ; CHECK-NEXT: popq %rax 126 ; CHECK-NEXT: .cfi_def_cfa_offset 8 127 ; CHECK-NEXT: retq 128 entry: 129 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0) 130 ret void 131 } 132 133 ; CHECK: __LLVM_StackMaps: 134 135 ; CHECK: .Ltmp0-test 136 ; Check for the two spill slots 137 ; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 138 ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 139 ; CHECK: .byte 3 140 ; CHECK: .byte 0 141 ; CHECK: .short 16 142 ; CHECK: .short 7 143 ; CHECK: .short 0 144 ; CHECK: .long 0 145 ; CHECK: .byte 3 146 ; CHECK: .byte 0 147 ; CHECK: .short 16 148 ; CHECK: .short 7 149 ; CHECK: .short 0 150 ; CHECK: .long 0 151 152 ; CHECK: .Ltmp1-test2 153 ; Check for the two spill slots 154 ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 155 ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 156 ; CHECK: .byte 3 157 ; CHECK: .byte 0 158 ; CHECK: .short 16 159 ; CHECK: .short 7 160 ; CHECK: .short 0 161 ; CHECK: .long 16 162 ; CHECK: .byte 3 163 ; CHECK: .byte 0 164 ; CHECK: .short 16 165 ; CHECK: .short 7 166 ; CHECK: .short 0 167 ; CHECK: .long 0 168 169 ; CHECK: .Ltmp2-test3 170 ; Check for the four spill slots 171 ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 172 ; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 173 ; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 174 ; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 175 ; CHECK: .byte 3 176 ; CHECK: .byte 0 177 ; CHECK: .short 16 178 ; CHECK: .short 7 179 ; CHECK: .short 0 180 ; CHECK: .long 16 181 ; CHECK: .byte 3 182 ; CHECK: .byte 0 183 ; CHECK: .short 16 184 ; CHECK: .short 7 185 ; CHECK: .short 0 186 ; CHECK: .long 16 187 ; CHECK: .byte 3 188 ; CHECK: .byte 0 189 ; CHECK: .short 16 190 ; CHECK: .short 7 191 ; CHECK: .short 0 192 ; CHECK: .long 16 193 ; CHECK: .byte 3 194 ; CHECK: .byte 0 195 ; CHECK: .short 16 196 ; CHECK: .short 7 197 ; CHECK: .short 0 198 ; CHECK: .long 0 199 200 declare void @do_safepoint() 201 202 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) 203 declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) 204 declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32) 205