1 # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s 2 # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s 3 4 --- 5 # Trivial clause at beginning of program 6 name: trivial_smem_clause_load_smrd4_x1 7 8 body: | 9 bb.0: 10 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1 11 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 12 ; GCN-NEXT: S_ENDPGM 13 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 14 S_ENDPGM 15 ... 16 --- 17 # Trivial clause at beginning of program 18 name: trivial_smem_clause_load_smrd4_x2 19 20 body: | 21 bb.0: 22 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2 23 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 24 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 25 ; GCN-NEXT: S_ENDPGM 26 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 27 $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 28 S_ENDPGM 29 ... 30 --- 31 # Trivial clause at beginning of program 32 name: trivial_smem_clause_load_smrd4_x3 33 34 body: | 35 bb.0: 36 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3 37 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 38 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 39 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 40 ; GCN-NEXT: S_ENDPGM 41 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 42 $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 43 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 44 S_ENDPGM 45 ... 46 --- 47 # Trivial clause at beginning of program 48 name: trivial_smem_clause_load_smrd4_x4 49 50 body: | 51 bb.0: 52 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4 53 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 54 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 55 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 56 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 57 ; GCN-NEXT: S_ENDPGM 58 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 59 $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 60 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 61 $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 62 S_ENDPGM 63 ... 64 --- 65 # Reuse of same input pointer is OK 66 name: trivial_smem_clause_load_smrd4_x2_sameptr 67 body: | 68 bb.0: 69 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr 70 ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 71 ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 72 ; GCN-NEXT: S_ENDPGM 73 $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 74 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 75 S_ENDPGM 76 ... 77 --- 78 # 32-bit load partially clobbers its own ptr reg 79 name: smrd_load4_overwrite_ptr_lo 80 81 body: | 82 bb.0: 83 ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo 84 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 85 ; GCN-NEXT: S_ENDPGM 86 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 87 S_ENDPGM 88 ... 89 --- 90 # 32-bit load partially clobbers its own ptr reg 91 name: smrd_load4_overwrite_ptr_hi 92 93 body: | 94 bb.0: 95 ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi 96 ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 97 ; GCN-NEXT: S_ENDPGM 98 $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 99 S_ENDPGM 100 ... 101 --- 102 # 64-bit load clobbers its own ptr reg 103 name: smrd_load8_overwrite_ptr 104 105 body: | 106 bb.0: 107 ; GCN-LABEL: name: smrd_load8_overwrite_ptr 108 ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 109 ; GCN-NEXT: S_ENDPGM 110 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 111 S_ENDPGM 112 ... 113 --- 114 # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt 115 # breaks the clause. 116 117 name: break_smem_clause_at_max_smem_clause_size_smrd_load4 118 119 body: | 120 bb.0: 121 ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4 122 ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 123 ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 124 ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 125 ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 126 ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 127 ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 128 ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 129 ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 130 ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 131 ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 132 ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 133 ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 134 ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 135 ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 136 ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 137 ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 138 ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 139 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 140 ; GCN-NEXT: S_ENDPGM 141 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 142 $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 143 $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 144 $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 145 146 $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 147 $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 148 $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 149 $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 150 151 $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 152 $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 153 $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 154 $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 155 156 $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 157 $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 158 $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 159 $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 160 161 $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 162 $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 163 S_ENDPGM 164 ... 165 --- 166 167 name: break_smem_clause_simple_load_smrd4_lo_ptr 168 169 body: | 170 bb.0: 171 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr 172 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 173 ; XNACK-NEXT: S_NOP 0 174 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 175 ; GCN-NEXT: S_ENDPGM 176 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 177 $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 178 S_ENDPGM 179 ... 180 --- 181 182 name: break_smem_clause_simple_load_smrd4_hi_ptr 183 184 body: | 185 bb.0: 186 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr 187 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 188 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 189 ; GCN-NEXT: S_ENDPGM 190 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 191 $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 192 S_ENDPGM 193 ... 194 --- 195 196 name: break_smem_clause_simple_load_smrd8_ptr 197 198 body: | 199 bb.0: 200 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr 201 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 202 ; XNACK-NEXT: S_NOP 0 203 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 204 ; GCN-NEXT: S_ENDPGM 205 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 206 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 207 S_ENDPGM 208 ... 209 --- 210 211 name: break_smem_clause_simple_load_smrd16_ptr 212 213 body: | 214 bb.0: 215 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr 216 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 217 ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 218 ; GCN-NEXT: S_ENDPGM 219 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 220 $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 221 S_ENDPGM 222 ... 223 --- 224 225 name: break_smem_clause_block_boundary_load_smrd8_ptr 226 227 body: | 228 ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr 229 ; GCN: bb.0: 230 ; GCN: successors: %bb.1(0x80000000) 231 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 232 ; GCN: bb.1: 233 ; XNACK-NEXT: S_NOP 0 234 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 235 ; GCN-NEXT: S_ENDPGM 236 bb.0: 237 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 238 239 bb.1: 240 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 241 S_ENDPGM 242 ... 243 --- 244 # The load clobbers the pointer of the store, so it needs to break. 245 246 name: break_smem_clause_store_load_into_ptr_smrd4 247 248 body: | 249 bb.0: 250 ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4 251 ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 252 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 253 ; GCN-NEXT: S_ENDPGM 254 S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 255 $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 256 S_ENDPGM 257 ... 258 --- 259 # The load clobbers the data of the store, so it needs to break. 260 # FIXME: Would it be better to s_nop and wait later? 261 262 name: break_smem_clause_store_load_into_data_smrd4 263 264 body: | 265 bb.0: 266 ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4 267 ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 268 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 269 ; GCN-NEXT: S_ENDPGM 270 S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 271 $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 272 S_ENDPGM 273 ... 274 --- 275 # Regular VALU instruction breaks clause, no nop needed 276 name: valu_inst_breaks_smem_clause 277 278 body: | 279 bb.0: 280 ; GCN-LABEL: name: valu_inst_breaks_smem_clause 281 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 282 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec 283 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 284 ; GCN-NEXT: S_ENDPGM 285 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 286 $vgpr8 = V_MOV_B32_e32 0, implicit $exec 287 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 288 S_ENDPGM 289 ... 290 --- 291 # Regular SALU instruction breaks clause, no nop needed 292 name: salu_inst_breaks_smem_clause 293 294 body: | 295 bb.0: 296 ; GCN-LABEL: name: salu_inst_breaks_smem_clause 297 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 298 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 299 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 300 ; GCN-NEXT: S_ENDPGM 301 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 302 $sgpr8 = S_MOV_B32 0 303 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 304 S_ENDPGM 305 ... 306 --- 307 name: ds_inst_breaks_smem_clause 308 309 body: | 310 bb.0: 311 ; GCN-LABEL: name: ds_inst_breaks_smem_clause 312 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 313 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec 314 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 315 ; GCN-NEXT: S_ENDPGM 316 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 317 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec 318 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 319 S_ENDPGM 320 ... 321 --- 322 323 name: flat_inst_breaks_smem_clause 324 325 body: | 326 bb.0: 327 ; GCN-LABEL: name: flat_inst_breaks_smem_clause 328 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 329 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 330 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 331 ; GCN-NEXT: S_ENDPGM 332 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 333 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr 334 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 335 S_ENDPGM 336 ... 337 --- 338 # FIXME: Should this be handled? 339 name: implicit_use_breaks_smem_clause 340 341 body: | 342 bb.0: 343 ; GCN-LABEL: name: implicit_use_breaks_smem_clause 344 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 345 ; XNACK-NEXT: S_NOP 0 346 ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 347 ; GCN-NEXT: S_ENDPGM 348 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 349 $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 350 S_ENDPGM 351 ... 352