1 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s 2 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s 3 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s 4 5 ; ALL-LABEL: {{^}}spill_sgpr_x2: 6 ; SMEM: s_add_u32 m0, s3, 0x100{{$}} 7 ; SMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Spill 8 ; SMEM: s_cbranch_scc1 9 10 ; SMEM: s_add_u32 m0, s3, 0x100{{$}} 11 ; SMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Reload 12 13 ; SMEM: s_dcache_wb 14 ; SMEM: s_endpgm 15 16 ; FIXME: Should only need 4 bytes 17 ; SMEM: ScratchSize: 12 18 19 20 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 21 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 22 ; VGPR: s_cbranch_scc1 23 24 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 25 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 26 27 ; VMEM: buffer_store_dword 28 ; VMEM: buffer_store_dword 29 ; VMEM: s_cbranch_scc1 30 31 ; VMEM: buffer_load_dword 32 ; VMEM: buffer_load_dword 33 define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 { 34 %wide.sgpr = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 35 %cmp = icmp eq i32 %in, 0 36 br i1 %cmp, label %bb0, label %ret 37 38 bb0: 39 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0 40 br label %ret 41 42 ret: 43 ret void 44 } 45 46 ; ALL-LABEL: {{^}}spill_sgpr_x4: 47 ; SMEM: s_add_u32 m0, s3, 0x100{{$}} 48 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Spill 49 ; SMEM: s_cbranch_scc1 50 51 ; SMEM: s_add_u32 m0, s3, 0x100{{$}} 52 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Reload 53 ; SMEM: s_dcache_wb 54 ; SMEM: s_endpgm 55 56 ; FIXME: Should only need 4 bytes 57 ; SMEM: ScratchSize: 20 58 59 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 60 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 61 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 62 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 63 ; VGPR: s_cbranch_scc1 64 65 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 66 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 67 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 68 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 69 70 71 ; VMEM: buffer_store_dword 72 ; VMEM: buffer_store_dword 73 ; VMEM: buffer_store_dword 74 ; VMEM: buffer_store_dword 75 ; VMEM: s_cbranch_scc1 76 77 ; VMEM: buffer_load_dword 78 ; VMEM: buffer_load_dword 79 ; VMEM: buffer_load_dword 80 ; VMEM: buffer_load_dword 81 define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 { 82 %wide.sgpr = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 83 %cmp = icmp eq i32 %in, 0 84 br i1 %cmp, label %bb0, label %ret 85 86 bb0: 87 call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0 88 br label %ret 89 90 ret: 91 ret void 92 } 93 94 ; ALL-LABEL: {{^}}spill_sgpr_x8: 95 96 ; SMEM: s_add_u32 m0, s3, 0x100{{$}} 97 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill 98 ; SMEM: s_add_u32 m0, s3, 0x110{{$}} 99 ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill 100 ; SMEM: s_cbranch_scc1 101 102 ; SMEM: s_add_u32 m0, s3, 0x100{{$}} 103 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload 104 ; SMEM: s_add_u32 m0, s3, 0x110{{$}} 105 ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload 106 107 ; SMEM: s_dcache_wb 108 ; SMEM: s_endpgm 109 110 ; SMEM: ScratchSize: 36 111 112 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 113 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 114 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 115 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 116 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 117 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 118 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 119 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 120 ; VGPR: s_cbranch_scc1 121 122 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 123 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 124 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 125 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 126 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 127 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 128 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 129 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 130 131 ; VMEM: buffer_store_dword 132 ; VMEM: buffer_store_dword 133 ; VMEM: buffer_store_dword 134 ; VMEM: buffer_store_dword 135 ; VMEM: buffer_store_dword 136 ; VMEM: buffer_store_dword 137 ; VMEM: buffer_store_dword 138 ; VMEM: buffer_store_dword 139 ; VMEM: s_cbranch_scc1 140 141 ; VMEM: buffer_load_dword 142 ; VMEM: buffer_load_dword 143 ; VMEM: buffer_load_dword 144 ; VMEM: buffer_load_dword 145 ; VMEM: buffer_load_dword 146 ; VMEM: buffer_load_dword 147 ; VMEM: buffer_load_dword 148 ; VMEM: buffer_load_dword 149 define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 { 150 %wide.sgpr = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 151 %cmp = icmp eq i32 %in, 0 152 br i1 %cmp, label %bb0, label %ret 153 154 bb0: 155 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0 156 br label %ret 157 158 ret: 159 ret void 160 } 161 162 ; FIXME: x16 inlineasm seems broken 163 ; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 { 164 ; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 165 ; %cmp = icmp eq i32 %in, 0 166 ; br i1 %cmp, label %bb0, label %ret 167 168 ; bb0: 169 ; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0 170 ; br label %ret 171 172 ; ret: 173 ; ret void 174 ; } 175 176 attributes #0 = { nounwind } 177