1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-lower-control-flow -o - %s | FileCheck %s 2 # Getting an undef that is specifically a VGPR is tricky from IR 3 4 # CHECK-LABEL: name: extract_undef_offset_vgpr{{$}} 5 # CHECK: bb.1: 6 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%) 7 # CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}} 8 9 # CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec 10 # CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3 11 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec 12 13 # CHECK: bb.2: 14 # CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}} 15 16 17 --- | 18 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" 19 20 define void @extract_undef_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 21 entry: 22 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 23 %value = extractelement <4 x i32> %ld, i32 undef 24 store i32 %value, i32 addrspace(1)* %out 25 ret void 26 } 27 28 define void @extract_undef_neg_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 29 entry: 30 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 31 %value = extractelement <4 x i32> %ld, i32 undef 32 store i32 %value, i32 addrspace(1)* %out 33 ret void 34 } 35 36 define void @insert_undef_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 37 entry: 38 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in 39 %value = insertelement <4 x i32> %ld, i32 5, i32 undef 40 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 41 ret void 42 } 43 44 define void @insert_undef_neg_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 45 entry: 46 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in 47 %value = insertelement <4 x i32> %ld, i32 5, i32 undef 48 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 49 ret void 50 } 51 52 define void @insert_undef_value_offset_vgpr(<4 x i32> addrspace(1)*%out, <4 x i32> addrspace(1)* %in, i32 %idx) { 53 entry: 54 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in 55 %value = insertelement <4 x i32> %ld, i32 undef, i32 %idx 56 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 57 ret void 58 } 59 60 ... 61 --- 62 name: extract_undef_offset_vgpr 63 alignment: 0 64 exposesReturnsTwice: false 65 hasInlineAsm: false 66 allVRegsAllocated: true 67 isSSA: false 68 tracksRegLiveness: true 69 tracksSubRegLiveness: true 70 liveins: 71 - { reg: '%sgpr0_sgpr1' } 72 frameInfo: 73 isFrameAddressTaken: false 74 isReturnAddressTaken: false 75 hasStackMap: false 76 hasPatchPoint: false 77 stackSize: 0 78 offsetAdjustment: 0 79 maxAlignment: 0 80 adjustsStack: false 81 hasCalls: false 82 maxCallFrameSize: 0 83 hasOpaqueSPAdjustment: false 84 hasVAStart: false 85 hasMustTailInVarArgFunc: false 86 body: | 87 bb.0.entry: 88 liveins: %sgpr0_sgpr1 89 90 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 91 %sgpr7 = S_MOV_B32 61440 92 %sgpr6 = S_MOV_B32 -1 93 S_WAITCNT 127 94 %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec 95 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 96 S_WAITCNT 3952 97 %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec 98 S_WAITCNT 127 99 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec 100 S_ENDPGM 101 102 ... 103 104 # CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}} 105 # CHECK: bb.1: 106 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%) 107 # CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}} 108 109 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec 110 # CHECK: %m0 = S_MOV_B32 %vcc_lo 111 # CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc 112 # CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3 113 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec 114 115 # CHECK: bb.2: 116 # CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1 117 118 name: extract_undef_neg_offset_vgpr 119 alignment: 0 120 exposesReturnsTwice: false 121 hasInlineAsm: false 122 allVRegsAllocated: true 123 isSSA: false 124 tracksRegLiveness: true 125 tracksSubRegLiveness: true 126 liveins: 127 - { reg: '%sgpr0_sgpr1' } 128 frameInfo: 129 isFrameAddressTaken: false 130 isReturnAddressTaken: false 131 hasStackMap: false 132 hasPatchPoint: false 133 stackSize: 0 134 offsetAdjustment: 0 135 maxAlignment: 0 136 adjustsStack: false 137 hasCalls: false 138 maxCallFrameSize: 0 139 hasOpaqueSPAdjustment: false 140 hasVAStart: false 141 hasMustTailInVarArgFunc: false 142 body: | 143 bb.0.entry: 144 liveins: %sgpr0_sgpr1 145 146 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 147 %sgpr7 = S_MOV_B32 61440 148 %sgpr6 = S_MOV_B32 -1 149 S_WAITCNT 127 150 %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec 151 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 152 S_WAITCNT 3952 153 %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec 154 S_WAITCNT 127 155 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec 156 S_ENDPGM 157 158 ... 159 160 # CHECK-LABEL: name: insert_undef_offset_vgpr{{$}} 161 # CHECK: bb.1: 162 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%) 163 # CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}} 164 165 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec 166 # CHECK: %m0 = S_MOV_B32 %vcc_lo 167 # CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3 168 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec 169 170 # CHECK: bb.2: 171 # CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1 172 173 name: insert_undef_offset_vgpr 174 alignment: 0 175 exposesReturnsTwice: false 176 hasInlineAsm: false 177 allVRegsAllocated: true 178 isSSA: false 179 tracksRegLiveness: true 180 tracksSubRegLiveness: true 181 liveins: 182 - { reg: '%sgpr0_sgpr1' } 183 frameInfo: 184 isFrameAddressTaken: false 185 isReturnAddressTaken: false 186 hasStackMap: false 187 hasPatchPoint: false 188 stackSize: 0 189 offsetAdjustment: 0 190 maxAlignment: 0 191 adjustsStack: false 192 hasCalls: false 193 maxCallFrameSize: 0 194 hasOpaqueSPAdjustment: false 195 hasVAStart: false 196 hasMustTailInVarArgFunc: false 197 body: | 198 bb.0.entry: 199 liveins: %sgpr0_sgpr1 200 201 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`) 202 %sgpr7 = S_MOV_B32 61440 203 %sgpr6 = S_MOV_B32 -1 204 %vgpr4 = V_MOV_B32_e32 5, implicit %exec 205 S_WAITCNT 127 206 %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in) 207 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`) 208 S_WAITCNT 3952 209 %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec 210 S_WAITCNT 127 211 BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out) 212 S_ENDPGM 213 214 ... 215 216 # CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}} 217 # CHECK: bb.1: 218 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%) 219 # CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}} 220 221 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec 222 # CHECK: %m0 = S_MOV_B32 %vcc_lo 223 # CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc 224 # CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3 225 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec 226 227 # CHECK: bb.2: 228 # CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}} 229 230 name: insert_undef_neg_offset_vgpr 231 alignment: 0 232 exposesReturnsTwice: false 233 hasInlineAsm: false 234 allVRegsAllocated: true 235 isSSA: false 236 tracksRegLiveness: true 237 tracksSubRegLiveness: true 238 liveins: 239 - { reg: '%sgpr0_sgpr1' } 240 frameInfo: 241 isFrameAddressTaken: false 242 isReturnAddressTaken: false 243 hasStackMap: false 244 hasPatchPoint: false 245 stackSize: 0 246 offsetAdjustment: 0 247 maxAlignment: 0 248 adjustsStack: false 249 hasCalls: false 250 maxCallFrameSize: 0 251 hasOpaqueSPAdjustment: false 252 hasVAStart: false 253 hasMustTailInVarArgFunc: false 254 body: | 255 bb.0.entry: 256 liveins: %sgpr0_sgpr1 257 258 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`) 259 %sgpr7 = S_MOV_B32 61440 260 %sgpr6 = S_MOV_B32 -1 261 %vgpr4 = V_MOV_B32_e32 5, implicit %exec 262 S_WAITCNT 127 263 %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in) 264 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`) 265 S_WAITCNT 3952 266 %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec 267 S_WAITCNT 127 268 BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out) 269 S_ENDPGM 270 271 ... 272 273 # CHECK-LABEL: insert_undef_value_offset_vgpr{{$}} 274 # CHECK: bb.1: 275 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%) 276 # CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}} 277 278 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec 279 # CHECK: %m0 = S_MOV_B32 %vcc_lo 280 # CHECK: %vgpr0 = V_MOVRELD_B32_e32 undef %vgpr10, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3 281 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec 282 283 # CHECK: bb.2: 284 # CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}} 285 286 name: insert_undef_value_offset_vgpr 287 alignment: 0 288 exposesReturnsTwice: false 289 hasInlineAsm: false 290 allVRegsAllocated: true 291 isSSA: false 292 tracksRegLiveness: true 293 tracksSubRegLiveness: true 294 liveins: 295 - { reg: '%sgpr0_sgpr1' } 296 frameInfo: 297 isFrameAddressTaken: false 298 isReturnAddressTaken: false 299 hasStackMap: false 300 hasPatchPoint: false 301 stackSize: 0 302 offsetAdjustment: 0 303 maxAlignment: 0 304 adjustsStack: false 305 hasCalls: false 306 maxCallFrameSize: 0 307 hasOpaqueSPAdjustment: false 308 hasVAStart: false 309 hasMustTailInVarArgFunc: false 310 body: | 311 bb.0.entry: 312 liveins: %sgpr0_sgpr1 313 314 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`) 315 %sgpr7 = S_MOV_B32 61440 316 %sgpr6 = S_MOV_B32 -1 317 %vgpr4 = V_MOV_B32_e32 2, implicit %exec 318 S_WAITCNT 127 319 %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in) 320 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`) 321 S_WAITCNT 3952 322 %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, killed %vgpr4, 0, undef %vgpr10, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec 323 S_WAITCNT 127 324 BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out) 325 S_ENDPGM 326 327 ... 328