Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
      2 # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s
      3 ---
      4 # Trivial clause at beginning of program
      5 name: trivial_clause_load_flat4_x1
      6 
      7 body: |
      8   bb.0:
      9     ; GCN-LABEL: name: trivial_clause_load_flat4_x1
     10     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     11     ; GCN-NEXT: S_ENDPGM
     12 
     13     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     14     S_ENDPGM
     15 ...
     16 ---
     17 # Trivial clause at beginning of program
     18 name: trivial_clause_load_flat4_x2
     19 
     20 body: |
     21   bb.0:
     22     ; GCN-LABEL: name: trivial_clause_load_flat4_x2
     23     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     24     ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
     25     ; GCN-NEXT: S_ENDPGM
     26 
     27     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     28     $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
     29     S_ENDPGM
     30 ...
     31 ---
     32 # Trivial clause at beginning of program
     33 name: trivial_clause_load_flat4_x3
     34 
     35 body: |
     36   bb.0:
     37     ; GCN-LABEL: name: trivial_clause_load_flat4_x3
     38     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
     39     ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
     40     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
     41     ; GCN-NEXT: S_ENDPGM
     42 
     43     $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
     44     $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
     45     $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
     46     S_ENDPGM
     47 ...
     48 ---
     49 # Trivial clause at beginning of program
     50 name: trivial_clause_load_flat4_x4
     51 
     52 body: |
     53   bb.0:
     54     ; GCN-LABEL: name: trivial_clause_load_flat4_x4
     55     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
     56     ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
     57     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
     58     ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
     59     ; GCN-NEXT: S_ENDPGM
     60 
     61     $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
     62     $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
     63     $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
     64     $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
     65     S_ENDPGM
     66 ...
     67 ---
     68 # Reuse of same input pointer is OK
     69 
     70 name: trivial_clause_load_flat4_x2_sameptr
     71 body: |
     72   bb.0:
     73     ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
     74     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     75     ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     76     ; GCN-NEXT: S_ENDPGM
     77 
     78     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     79     $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
     80     S_ENDPGM
     81 ...
     82 ---
     83 # 32-bit load partially clobbers its own ptr reg
     84 name: flat_load4_overwrite_ptr_lo
     85 
     86 body: |
     87   bb.0:
     88     ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
     89     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
     90     ; GCN-NEXT: S_ENDPGM
     91 
     92     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
     93     S_ENDPGM
     94 ...
     95 ---
     96 # 32-bit load partially clobbers its own ptr reg
     97 name: flat_load4_overwrite_ptr_hi
     98 
     99 body: |
    100   bb.0:
    101     ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
    102     ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    103     ; GCN-NEXT: S_ENDPGM
    104 
    105     $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    106     S_ENDPGM
    107 ...
    108 ---
    109 # 64-bit load clobbers its own ptr reg
    110 name: flat_load8_overwrite_ptr
    111 
    112 body: |
    113   bb.0:
    114     ; GCN-LABEL: name: flat_load8_overwrite_ptr
    115     ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    116     ; GCN-NEXT: S_ENDPGM
    117 
    118     $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    119     S_ENDPGM
    120 ...
    121 ---
    122 # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
    123 # breaks the clause.
    124 
    125 
    126 name: break_clause_at_max_clause_size_flat_load4
    127 
    128 body: |
    129   bb.0:
    130     ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
    131     ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    132     ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    133     ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    134     ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    135     ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    136     ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    137     ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    138     ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    139     ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    140     ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    141     ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    142     ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    143     ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    144     ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    145     ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    146     ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    147     ; XNACK-NEXT: S_NOP 0
    148     ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    149     ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
    150     ; GCN-NEXT: S_ENDPGM
    151 
    152     $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    153     $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    154     $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    155     $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    156 
    157     $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    158     $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    159     $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    160     $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    161 
    162     $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    163     $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    164     $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    165     $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    166 
    167     $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    168     $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    169     $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    170     $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
    171 
    172     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    173     $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
    174     S_ENDPGM
    175 ...
    176 ---
    177 
    178 name: break_clause_simple_load_flat4_lo_ptr
    179 
    180 body: |
    181   bb.0:
    182     ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
    183     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    184     ; XNACK-NEXT: S_NOP 0
    185     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    186     ; GCN-NEXT: S_ENDPGM
    187 
    188     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    189     $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    190     S_ENDPGM
    191 ...
    192 ---
    193 
    194 name: break_clause_simple_load_flat4_hi_ptr
    195 
    196 body: |
    197   bb.0:
    198     ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
    199     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    200     ; XNACK-NEXT: S_NOP 0
    201     ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    202     ; GCN-NEXT: S_ENDPGM
    203 
    204     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    205     $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    206     S_ENDPGM
    207 ...
    208 ---
    209 
    210 name: break_clause_simple_load_flat8_ptr
    211 
    212 body: |
    213   bb.0:
    214     ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
    215     ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    216     ; XNACK-NEXT: S_NOP 0
    217     ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    218     ; GCN-NEXT: S_ENDPGM
    219 
    220     $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    221     $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    222     S_ENDPGM
    223 ...
    224 ---
    225 
    226 
    227 name: break_clause_simple_load_flat16_ptr
    228 
    229 body: |
    230   bb.0:
    231     ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
    232     ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    233     ; XNACK-NEXT: S_NOP 0
    234     ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
    235     ; GCN-NEXT: S_ENDPGM
    236     $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    237     $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
    238     S_ENDPGM
    239 ...
    240 ---
    241 
    242 # The clause is broken by the waitcnt inserted at the end of the
    243 # block, so no nop is needed.
    244 
    245 
    246 name: break_clause_block_boundary_load_flat8_ptr
    247 
    248 body: |
    249   ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
    250   ; GCN: bb.0:
    251   ; GCN-NEXT:   successors: %bb.1(0x80000000)
    252   ; GCN:   $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    253   ; GCN: bb.1:
    254   ; XNACK-NEXT:  S_NOP 0
    255   ; GCN-NEXT:   $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    256   ; GCN-NEXT:   S_ENDPGM
    257 
    258   bb.0:
    259     $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    260 
    261   bb.1:
    262     $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    263     S_ENDPGM
    264 ...
    265 ---
    266 # The load clobbers the pointer of the store, so it needs to break.
    267 
    268 name: break_clause_store_load_into_ptr_flat4
    269 
    270 body: |
    271   bb.0:
    272     ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
    273     ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
    274     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    275     ; GCN-NEXT: S_ENDPGM
    276 
    277     FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
    278     $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    279     S_ENDPGM
    280 ...
    281 ---
    282 # The load clobbers the data of the store, so it needs to break.
    283 # FIXME: Would it be better to s_nop and wait later?
    284 
    285 name: break_clause_store_load_into_data_flat4
    286 
    287 body: |
    288   bb.0:
    289     ; GCN-LABEL: name: break_clause_store_load_into_data_flat4
    290     ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
    291     ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    292     ; GCN-NEXT: S_ENDPGM
    293 
    294     FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
    295     $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    296     S_ENDPGM
    297 ...
    298 ---
    299 # Regular VALU instruction breaks clause, no nop needed
    300 
    301 name: valu_inst_breaks_clause
    302 
    303 body: |
    304   bb.0:
    305     ; GCN-LABEL: name: valu_inst_breaks_clause
    306     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    307     ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
    308     ; XNACK-NEXT: S_NOP 0
    309     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    310     ; GCN-NEXT: S_ENDPGM
    311 
    312     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    313     $vgpr8 = V_MOV_B32_e32 0, implicit $exec
    314     $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    315     S_ENDPGM
    316 ...
    317 ---
    318 # Regular SALU instruction breaks clause, no nop needed
    319 
    320 name: salu_inst_breaks_clause
    321 
    322 body: |
    323   bb.0:
    324     ; GCN-LABEL: name: salu_inst_breaks_clause
    325     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    326     ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
    327     ; XNACK-NEXT: S_NOP 0
    328     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    329     ; GCN-NEXT: S_ENDPGM
    330 
    331     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    332     $sgpr8 = S_MOV_B32 0
    333     $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    334     S_ENDPGM
    335 ...
    336 ---
    337 
    338 name: ds_inst_breaks_clause
    339 
    340 body: |
    341   bb.0:
    342     ; GCN-LABEL: name: ds_inst_breaks_clause
    343     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    344     ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
    345     ; XNACK-NEXT: S_NOP 0
    346     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    347     ; GCN-NEXT: S_ENDPGM
    348 
    349     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    350     $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
    351     $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    352     S_ENDPGM
    353 ...
    354 ---
    355 
    356 name: smrd_inst_breaks_clause
    357 
    358 body: |
    359   bb.0:
    360     ; GCN-LABEL: name: smrd_inst_breaks_clause
    361     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    362     ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
    363     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    364     ; GCN-NEXT: S_ENDPGM
    365 
    366     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    367     $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
    368     $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    369     S_ENDPGM
    370 ...
    371 ---
    372 # FIXME: Should this be handled?
    373 name: implicit_use_breaks_clause
    374 
    375 body: |
    376   bb.0:
    377     ; GCN-LABEL: name: implicit_use_breaks_clause
    378     ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
    379     ; XNACK-NEXT: S_NOP 0
    380     ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
    381     ; GCN-NEXT: S_ENDPGM
    382 
    383     $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
    384     $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
    385     S_ENDPGM
    386 ...
    387 ---
    388 name: trivial_clause_load_mubuf4_x2
    389 
    390 body: |
    391   bb.0:
    392     ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
    393     ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    394     ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    395     ; GCN-NEXT: S_ENDPGM
    396 
    397     $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    398     $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    399     S_ENDPGM
    400 ...
    401 ---
    402 name: break_clause_simple_load_mubuf_offen_ptr
    403 
    404 body: |
    405   bb.0:
    406     ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
    407     ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    408     ; XNACK-NEXT: S_NOP 0
    409     ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    410     ; GCN-NEXT: S_ENDPGM
    411 
    412     $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    413     $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    414     S_ENDPGM
    415 ...
    416 ---
    417 # BUFFER instructions overwriting their own inputs is supposedly OK.
    418 
    419 name: mubuf_load4_overwrite_ptr
    420 
    421 body: |
    422   bb.0:
    423     ; GCN-LABEL: name: mubuf_load4_overwrite_ptr
    424     ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    425     ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
    426     ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
    427     ; GCN-NEXT: S_ENDPGM
    428     $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    429     $vgpr1 = V_MOV_B32_e32 0, implicit $exec
    430     $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
    431     S_ENDPGM
    432 ...
    433 ---
    434 # Break a clause from interference between mubuf and flat instructions
    435 
    436 name: break_clause_flat_load_mubuf_load
    437 
    438 body: |
    439   bb.0:
    440     ; GCN-LABEL: name: break_clause_flat_load_mubuf_load
    441     ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    442     ; XNACK-NEXT: S_NOP 0
    443     ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    444     ; GCN-NEXT: S_ENDPGM
    445 
    446     $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    447     $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    448     S_ENDPGM
    449 ...
    450 # Break a clause from interference between mubuf and flat instructions
    451 
    452 # GCN-LABEL: name: break_clause_mubuf_load_flat_load
    453 # GCN: bb.0:
    454 # GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
    455 # XNACK-NEXT: S_NOP 0
    456 # GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3
    457 # GCN-NEXT: S_ENDPGM
    458 name: break_clause_mubuf_load_flat_load
    459 
    460 body: |
    461   bb.0:
    462     $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    463     $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    464 
    465     S_ENDPGM
    466 ...
    467 ---
    468 
    469 name: break_clause_atomic_rtn_into_ptr_flat4
    470 
    471 body: |
    472   bb.0:
    473     ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
    474     ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    475     ; XNACK-NEXT: S_NOP 0
    476     ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr
    477     ; GCN-NEXT: S_ENDPGM
    478 
    479     $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    480     $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr
    481     S_ENDPGM
    482 ...
    483 ---
    484 name: break_clause_atomic_nortn_ptr_load_flat4
    485 
    486 body: |
    487   bb.0:
    488     ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
    489     ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
    490     ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
    491     ; GCN-NEXT: S_ENDPGM
    492 
    493     FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
    494     $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
    495     S_ENDPGM
    496 ...
    497 ---
    498 
    499 name: break_clause_atomic_rtn_into_ptr_mubuf4
    500 
    501 body: |
    502   bb.0:
    503     ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
    504     ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    505     ; XNACK-NEXT: S_NOP 0
    506     ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
    507     ; GCN-NEXT: S_ENDPGM
    508 
    509     $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    510     $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
    511     S_ENDPGM
    512 ...
    513 ---
    514 
    515 name: break_clause_atomic_nortn_ptr_load_mubuf4
    516 
    517 body: |
    518   bb.0:
    519     ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
    520     ; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
    521     ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    522     ; GCN-NEXT: S_ENDPGM
    523 
    524     BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
    525     $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    526     S_ENDPGM
    527 ...
    528 ---
    529 # Make sure there is no assert on mubuf instructions which do not have
    530 # vaddr, and don't add register to track.
    531 name: no_break_clause_mubuf_load_novaddr
    532 
    533 body: |
    534   bb.0:
    535     ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
    536     ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    537     ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    538     ; GCN-NEXT: S_ENDPGM
    539     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    540     $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
    541     S_ENDPGM
    542 ...
    543 ---
    544 # Loads and stores using different addresses theoretically does not
    545 # need a nop
    546 name: mix_load_store_clause
    547 body: |
    548   bb.0:
    549     ; GCN-LABEL: name: mix_load_store_clause
    550     ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    551     ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    552     ; XNACK-NEXT: S_NOP 0
    553     ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
    554     ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    555 
    556     FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    557     $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    558     FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
    559     $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    560     S_ENDPGM
    561 ...
    562 ---
    563 # Loads and stores using the same address needs a nop.
    564 
    565 name: mix_load_store_clause_same_address
    566 body: |
    567   bb.0:
    568     ; GCN-LABEL: name: mix_load_store_clause_same_address
    569     ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    570     ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    571     ; XNACK-NEXT: S_NOP 0
    572     ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
    573     ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    574 
    575     FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    576     $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
    577     FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
    578     $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
    579     S_ENDPGM
    580 ...
    581