1 # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s 2 # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s 3 --- 4 # Trivial clause at beginning of program 5 name: trivial_clause_load_flat4_x1 6 7 body: | 8 bb.0: 9 ; GCN-LABEL: name: trivial_clause_load_flat4_x1 10 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 11 ; GCN-NEXT: S_ENDPGM 12 13 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 14 S_ENDPGM 15 ... 16 --- 17 # Trivial clause at beginning of program 18 name: trivial_clause_load_flat4_x2 19 20 body: | 21 bb.0: 22 ; GCN-LABEL: name: trivial_clause_load_flat4_x2 23 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 24 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 25 ; GCN-NEXT: S_ENDPGM 26 27 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 28 $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 29 S_ENDPGM 30 ... 31 --- 32 # Trivial clause at beginning of program 33 name: trivial_clause_load_flat4_x3 34 35 body: | 36 bb.0: 37 ; GCN-LABEL: name: trivial_clause_load_flat4_x3 38 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr 39 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr 40 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr 41 ; GCN-NEXT: S_ENDPGM 42 43 $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr 44 $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr 45 $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr 46 S_ENDPGM 47 ... 48 --- 49 # Trivial clause at beginning of program 50 name: trivial_clause_load_flat4_x4 51 52 body: | 53 bb.0: 54 ; GCN-LABEL: name: trivial_clause_load_flat4_x4 55 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 56 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr 57 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr 58 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr 59 ; GCN-NEXT: S_ENDPGM 60 61 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 62 $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr 63 $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr 64 $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr 65 S_ENDPGM 66 ... 67 --- 68 # Reuse of same input pointer is OK 69 70 name: trivial_clause_load_flat4_x2_sameptr 71 body: | 72 bb.0: 73 ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr 74 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 75 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 76 ; GCN-NEXT: S_ENDPGM 77 78 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 79 $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 80 S_ENDPGM 81 ... 82 --- 83 # 32-bit load partially clobbers its own ptr reg 84 name: flat_load4_overwrite_ptr_lo 85 86 body: | 87 bb.0: 88 ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo 89 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 90 ; GCN-NEXT: S_ENDPGM 91 92 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 93 S_ENDPGM 94 ... 95 --- 96 # 32-bit load partially clobbers its own ptr reg 97 name: flat_load4_overwrite_ptr_hi 98 99 body: | 100 bb.0: 101 ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi 102 ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 103 ; GCN-NEXT: S_ENDPGM 104 105 $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 106 S_ENDPGM 107 ... 108 --- 109 # 64-bit load clobbers its own ptr reg 110 name: flat_load8_overwrite_ptr 111 112 body: | 113 bb.0: 114 ; GCN-LABEL: name: flat_load8_overwrite_ptr 115 ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 116 ; GCN-NEXT: S_ENDPGM 117 118 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 119 S_ENDPGM 120 ... 121 --- 122 # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt 123 # breaks the clause. 124 125 126 name: break_clause_at_max_clause_size_flat_load4 127 128 body: | 129 bb.0: 130 ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4 131 ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 132 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 133 ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 134 ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 135 ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 136 ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 137 ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 138 ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 139 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 140 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 141 ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 142 ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 143 ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 144 ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 145 ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 146 ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 147 ; XNACK-NEXT: S_NOP 0 148 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 149 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 150 ; GCN-NEXT: S_ENDPGM 151 152 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 153 $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 154 $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 155 $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 156 157 $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 158 $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 159 $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 160 $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 161 162 $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 163 $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 164 $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 165 $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 166 167 $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 168 $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 169 $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 170 $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 171 172 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 173 $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 174 S_ENDPGM 175 ... 176 --- 177 178 name: break_clause_simple_load_flat4_lo_ptr 179 180 body: | 181 bb.0: 182 ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr 183 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 184 ; XNACK-NEXT: S_NOP 0 185 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 186 ; GCN-NEXT: S_ENDPGM 187 188 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 189 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 190 S_ENDPGM 191 ... 192 --- 193 194 name: break_clause_simple_load_flat4_hi_ptr 195 196 body: | 197 bb.0: 198 ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr 199 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 200 ; XNACK-NEXT: S_NOP 0 201 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 202 ; GCN-NEXT: S_ENDPGM 203 204 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 205 $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 206 S_ENDPGM 207 ... 208 --- 209 210 name: break_clause_simple_load_flat8_ptr 211 212 body: | 213 bb.0: 214 ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr 215 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 216 ; XNACK-NEXT: S_NOP 0 217 ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 218 ; GCN-NEXT: S_ENDPGM 219 220 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 221 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 222 S_ENDPGM 223 ... 224 --- 225 226 227 name: break_clause_simple_load_flat16_ptr 228 229 body: | 230 bb.0: 231 ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr 232 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 233 ; XNACK-NEXT: S_NOP 0 234 ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr 235 ; GCN-NEXT: S_ENDPGM 236 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 237 $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr 238 S_ENDPGM 239 ... 240 --- 241 242 # The clause is broken by the waitcnt inserted at the end of the 243 # block, so no nop is needed. 244 245 246 name: break_clause_block_boundary_load_flat8_ptr 247 248 body: | 249 ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr 250 ; GCN: bb.0: 251 ; GCN-NEXT: successors: %bb.1(0x80000000) 252 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 253 ; GCN: bb.1: 254 ; XNACK-NEXT: S_NOP 0 255 ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 256 ; GCN-NEXT: S_ENDPGM 257 258 bb.0: 259 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 260 261 bb.1: 262 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 263 S_ENDPGM 264 ... 265 --- 266 # The load clobbers the pointer of the store, so it needs to break. 267 268 name: break_clause_store_load_into_ptr_flat4 269 270 body: | 271 bb.0: 272 ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4 273 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 274 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 275 ; GCN-NEXT: S_ENDPGM 276 277 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 278 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 279 S_ENDPGM 280 ... 281 --- 282 # The load clobbers the data of the store, so it needs to break. 283 # FIXME: Would it be better to s_nop and wait later? 284 285 name: break_clause_store_load_into_data_flat4 286 287 body: | 288 bb.0: 289 ; GCN-LABEL: name: break_clause_store_load_into_data_flat4 290 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 291 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 292 ; GCN-NEXT: S_ENDPGM 293 294 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 295 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 296 S_ENDPGM 297 ... 298 --- 299 # Regular VALU instruction breaks clause, no nop needed 300 301 name: valu_inst_breaks_clause 302 303 body: | 304 bb.0: 305 ; GCN-LABEL: name: valu_inst_breaks_clause 306 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 307 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec 308 ; XNACK-NEXT: S_NOP 0 309 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 310 ; GCN-NEXT: S_ENDPGM 311 312 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 313 $vgpr8 = V_MOV_B32_e32 0, implicit $exec 314 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 315 S_ENDPGM 316 ... 317 --- 318 # Regular SALU instruction breaks clause, no nop needed 319 320 name: salu_inst_breaks_clause 321 322 body: | 323 bb.0: 324 ; GCN-LABEL: name: salu_inst_breaks_clause 325 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 326 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 327 ; XNACK-NEXT: S_NOP 0 328 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 329 ; GCN-NEXT: S_ENDPGM 330 331 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 332 $sgpr8 = S_MOV_B32 0 333 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 334 S_ENDPGM 335 ... 336 --- 337 338 name: ds_inst_breaks_clause 339 340 body: | 341 bb.0: 342 ; GCN-LABEL: name: ds_inst_breaks_clause 343 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 344 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec 345 ; XNACK-NEXT: S_NOP 0 346 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 347 ; GCN-NEXT: S_ENDPGM 348 349 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 350 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec 351 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 352 S_ENDPGM 353 ... 354 --- 355 356 name: smrd_inst_breaks_clause 357 358 body: | 359 bb.0: 360 ; GCN-LABEL: name: smrd_inst_breaks_clause 361 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 362 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 363 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 364 ; GCN-NEXT: S_ENDPGM 365 366 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 367 $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 368 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 369 S_ENDPGM 370 ... 371 --- 372 # FIXME: Should this be handled? 373 name: implicit_use_breaks_clause 374 375 body: | 376 bb.0: 377 ; GCN-LABEL: name: implicit_use_breaks_clause 378 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 379 ; XNACK-NEXT: S_NOP 0 380 ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr 381 ; GCN-NEXT: S_ENDPGM 382 383 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 384 $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr 385 S_ENDPGM 386 ... 387 --- 388 name: trivial_clause_load_mubuf4_x2 389 390 body: | 391 bb.0: 392 ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2 393 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 394 ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 395 ; GCN-NEXT: S_ENDPGM 396 397 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 398 $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 399 S_ENDPGM 400 ... 401 --- 402 name: break_clause_simple_load_mubuf_offen_ptr 403 404 body: | 405 bb.0: 406 ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr 407 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 408 ; XNACK-NEXT: S_NOP 0 409 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 410 ; GCN-NEXT: S_ENDPGM 411 412 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 413 $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 414 S_ENDPGM 415 ... 416 --- 417 # BUFFER instructions overwriting their own inputs is supposedly OK. 418 419 name: mubuf_load4_overwrite_ptr 420 421 body: | 422 bb.0: 423 ; GCN-LABEL: name: mubuf_load4_overwrite_ptr 424 ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 425 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec 426 ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec 427 ; GCN-NEXT: S_ENDPGM 428 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 429 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 430 $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec 431 S_ENDPGM 432 ... 433 --- 434 # Break a clause from interference between mubuf and flat instructions 435 436 name: break_clause_flat_load_mubuf_load 437 438 body: | 439 bb.0: 440 ; GCN-LABEL: name: break_clause_flat_load_mubuf_load 441 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 442 ; XNACK-NEXT: S_NOP 0 443 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 444 ; GCN-NEXT: S_ENDPGM 445 446 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 447 $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 448 S_ENDPGM 449 ... 450 # Break a clause from interference between mubuf and flat instructions 451 452 # GCN-LABEL: name: break_clause_mubuf_load_flat_load 453 # GCN: bb.0: 454 # GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 455 # XNACK-NEXT: S_NOP 0 456 # GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3 457 # GCN-NEXT: S_ENDPGM 458 name: break_clause_mubuf_load_flat_load 459 460 body: | 461 bb.0: 462 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 463 $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 464 465 S_ENDPGM 466 ... 467 --- 468 469 name: break_clause_atomic_rtn_into_ptr_flat4 470 471 body: | 472 bb.0: 473 ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4 474 ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 475 ; XNACK-NEXT: S_NOP 0 476 ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr 477 ; GCN-NEXT: S_ENDPGM 478 479 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 480 $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr 481 S_ENDPGM 482 ... 483 --- 484 name: break_clause_atomic_nortn_ptr_load_flat4 485 486 body: | 487 bb.0: 488 ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4 489 ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr 490 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr 491 ; GCN-NEXT: S_ENDPGM 492 493 FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr 494 $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr 495 S_ENDPGM 496 ... 497 --- 498 499 name: break_clause_atomic_rtn_into_ptr_mubuf4 500 501 body: | 502 bb.0: 503 ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4 504 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 505 ; XNACK-NEXT: S_NOP 0 506 ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec 507 ; GCN-NEXT: S_ENDPGM 508 509 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 510 $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec 511 S_ENDPGM 512 ... 513 --- 514 515 name: break_clause_atomic_nortn_ptr_load_mubuf4 516 517 body: | 518 bb.0: 519 ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4 520 ; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec 521 ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 522 ; GCN-NEXT: S_ENDPGM 523 524 BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec 525 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 526 S_ENDPGM 527 ... 528 --- 529 # Make sure there is no assert on mubuf instructions which do not have 530 # vaddr, and don't add register to track. 531 name: no_break_clause_mubuf_load_novaddr 532 533 body: | 534 bb.0: 535 ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr 536 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 537 ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 538 ; GCN-NEXT: S_ENDPGM 539 $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 540 $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 541 S_ENDPGM 542 ... 543 --- 544 # Loads and stores using different addresses theoretically does not 545 # need a nop 546 name: mix_load_store_clause 547 body: | 548 bb.0: 549 ; GCN-LABEL: name: mix_load_store_clause 550 ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 551 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 552 ; XNACK-NEXT: S_NOP 0 553 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr 554 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 555 556 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 557 $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 558 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr 559 $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 560 S_ENDPGM 561 ... 562 --- 563 # Loads and stores using the same address needs a nop. 564 565 name: mix_load_store_clause_same_address 566 body: | 567 bb.0: 568 ; GCN-LABEL: name: mix_load_store_clause_same_address 569 ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 570 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 571 ; XNACK-NEXT: S_NOP 0 572 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr 573 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 574 575 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 576 $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr 577 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr 578 $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr 579 S_ENDPGM 580 ... 581