Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc -march=amdgcn -mcpu=fiji -start-before si-peephole-sdwa -o - %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
      2 # RUN: llc -march=amdgcn -mcpu=gfx900 -start-before si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
      3 
      4 # GCN-LABEL: {{^}}sdwa_imm_operand:
      5 # GCN: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2
      6 # GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 2
      7 # GCN: BB0_1:
      8 # GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
      9 # GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
     10 
     11 # GCN-LABEL: {{^}}sdwa_sgpr_operand:
     12 # VI: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2
     13 # VI-NOT: v_mov_b32_e32 v{{[0-9]+}}, 2
     14 # VI: BB1_1:
     15 # VI: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
     16 # VI: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
     17 
     18 # GFX9: s_mov_b32 s[[SHIFT:[0-9]+]], 2
     19 # GFX9-NOT: v_mov_b32_e32 v{{[0-9]+}}, 2
     20 # GFX9: BB1_1:
     21 # GFX9: v_lshlrev_b32_sdwa v{{[0-9]+}}, s[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
     22 # GFX9: v_lshlrev_b32_sdwa v{{[0-9]+}}, s[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
     23 
     24 --- |
     25   ; ModuleID = 'sdwa-scalar-ops.opt.ll'
     26   source_filename = "sdwa-scalar-ops.opt.ll"
     27   target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
     28 
     29   define amdgpu_kernel void @sdwa_imm_operand(i32 addrspace(1)* nocapture %arg) {
     30   bb:
     31     br label %bb2
     32 
     33   bb1:                                              ; preds = %bb2
     34     ret void
     35 
     36   bb2:                                              ; preds = %bb2, %bb
     37     %lsr.iv = phi i64 [ %lsr.iv.next, %bb2 ], [ 0, %bb ]
     38     %bc = bitcast i32 addrspace(1)* %arg to i8 addrspace(1)*
     39     %uglygep4 = getelementptr i8, i8 addrspace(1)* %bc, i64 %lsr.iv
     40     %uglygep45 = bitcast i8 addrspace(1)* %uglygep4 to i32 addrspace(1)*
     41     %tmp5 = load i32, i32 addrspace(1)* %uglygep45, align 4
     42     %tmp6 = lshr i32 %tmp5, 8
     43     %tmp7 = and i32 %tmp6, 255
     44     %tmp8 = zext i32 %tmp7 to i64
     45     %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp8
     46     store i32 1, i32 addrspace(1)* %tmp9, align 4
     47     %scevgep = getelementptr i32, i32 addrspace(1)* %uglygep45, i64 1
     48     %tmp13 = load i32, i32 addrspace(1)* %scevgep, align 4
     49     %tmp14 = lshr i32 %tmp13, 8
     50     %tmp15 = and i32 %tmp14, 255
     51     %tmp16 = zext i32 %tmp15 to i64
     52     %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16
     53     store i32 1, i32 addrspace(1)* %tmp17, align 4
     54     %lsr.iv.next = add nuw nsw i64 %lsr.iv, 8
     55     %tmp1 = trunc i64 %lsr.iv.next to i32
     56     %tmp19 = icmp eq i32 %tmp1, 4096
     57     br i1 %tmp19, label %bb1, label %bb2
     58   }
     59 
     60   define amdgpu_kernel void @sdwa_sgpr_operand(i32 addrspace(1)* nocapture %arg) {
     61   bb:
     62     br label %bb2
     63 
     64   bb1:                                              ; preds = %bb2
     65     ret void
     66 
     67   bb2:                                              ; preds = %bb2, %bb
     68     %lsr.iv = phi i64 [ %lsr.iv.next, %bb2 ], [ 0, %bb ]
     69     %bc = bitcast i32 addrspace(1)* %arg to i8 addrspace(1)*
     70     %uglygep4 = getelementptr i8, i8 addrspace(1)* %bc, i64 %lsr.iv
     71     %uglygep45 = bitcast i8 addrspace(1)* %uglygep4 to i32 addrspace(1)*
     72     %tmp5 = load i32, i32 addrspace(1)* %uglygep45, align 4
     73     %tmp6 = lshr i32 %tmp5, 8
     74     %tmp7 = and i32 %tmp6, 255
     75     %tmp8 = zext i32 %tmp7 to i64
     76     %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp8
     77     store i32 1, i32 addrspace(1)* %tmp9, align 4
     78     %scevgep = getelementptr i32, i32 addrspace(1)* %uglygep45, i64 1
     79     %tmp13 = load i32, i32 addrspace(1)* %scevgep, align 4
     80     %tmp14 = lshr i32 %tmp13, 8
     81     %tmp15 = and i32 %tmp14, 255
     82     %tmp16 = zext i32 %tmp15 to i64
     83     %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16
     84     store i32 1, i32 addrspace(1)* %tmp17, align 4
     85     %lsr.iv.next = add nuw nsw i64 %lsr.iv, 8
     86     %tmp1 = trunc i64 %lsr.iv.next to i32
     87     %tmp19 = icmp eq i32 %tmp1, 4096
     88     br i1 %tmp19, label %bb1, label %bb2
     89   }
     90 
     91 ...
     92 ---
     93 name:            sdwa_imm_operand
     94 alignment:       0
     95 exposesReturnsTwice: false
     96 legalized:       false
     97 regBankSelected: false
     98 selected:        false
     99 tracksRegLiveness: true
    100 registers:
    101   - { id: 0, class: sreg_64 }
    102   - { id: 1, class: sreg_64 }
    103   - { id: 2, class: vgpr_32 }
    104   - { id: 3, class: sgpr_128 }
    105   - { id: 4, class: sgpr_64 }
    106   - { id: 5, class: sreg_32_xm0 }
    107   - { id: 6, class: sgpr_32 }
    108   - { id: 7, class: sreg_64 }
    109   - { id: 8, class: sreg_64 }
    110   - { id: 9, class: sreg_64_xexec }
    111   - { id: 10, class: sreg_32_xm0 }
    112   - { id: 11, class: sreg_32_xm0 }
    113   - { id: 12, class: sreg_32_xm0 }
    114   - { id: 13, class: sreg_32_xm0 }
    115   - { id: 14, class: sreg_32_xm0 }
    116   - { id: 15, class: sreg_32_xm0 }
    117   - { id: 16, class: sreg_64 }
    118   - { id: 17, class: vgpr_32 }
    119   - { id: 18, class: vreg_64 }
    120   - { id: 19, class: sreg_32_xm0 }
    121   - { id: 20, class: sreg_32 }
    122   - { id: 21, class: sreg_32_xm0 }
    123   - { id: 22, class: sreg_32_xm0 }
    124   - { id: 23, class: sreg_32_xm0 }
    125   - { id: 24, class: sreg_64 }
    126   - { id: 25, class: sreg_32_xm0 }
    127   - { id: 26, class: sreg_32_xm0 }
    128   - { id: 27, class: sreg_32_xm0 }
    129   - { id: 28, class: sreg_32_xm0 }
    130   - { id: 29, class: sreg_64 }
    131   - { id: 30, class: vgpr_32 }
    132   - { id: 31, class: vreg_64 }
    133   - { id: 32, class: sreg_32_xm0 }
    134   - { id: 33, class: sreg_32_xm0 }
    135   - { id: 34, class: sreg_64 }
    136   - { id: 35, class: sreg_32_xm0 }
    137   - { id: 36, class: sreg_32_xm0 }
    138   - { id: 37, class: sreg_32_xm0 }
    139   - { id: 38, class: sreg_32_xm0 }
    140   - { id: 39, class: vreg_64 }
    141   - { id: 40, class: vgpr_32 }
    142   - { id: 41, class: vreg_64 }
    143   - { id: 42, class: sreg_32_xm0 }
    144   - { id: 43, class: sreg_32 }
    145   - { id: 44, class: sreg_32_xm0 }
    146   - { id: 45, class: sreg_64 }
    147   - { id: 46, class: sreg_32_xm0 }
    148   - { id: 47, class: sreg_32_xm0 }
    149   - { id: 48, class: sreg_32_xm0 }
    150   - { id: 49, class: sreg_32_xm0 }
    151   - { id: 50, class: sreg_64 }
    152   - { id: 51, class: vreg_64 }
    153   - { id: 52, class: sreg_64 }
    154   - { id: 53, class: sreg_32_xm0 }
    155   - { id: 54, class: sreg_32_xm0 }
    156   - { id: 55, class: sreg_32_xm0 }
    157   - { id: 56, class: sreg_32_xm0 }
    158   - { id: 57, class: sreg_64 }
    159   - { id: 58, class: sreg_32_xm0 }
    160   - { id: 59, class: sreg_32_xm0 }
    161   - { id: 60, class: vgpr_32 }
    162   - { id: 61, class: vgpr_32 }
    163   - { id: 62, class: vreg_64 }
    164   - { id: 63, class: vgpr_32 }
    165   - { id: 64, class: vgpr_32 }
    166   - { id: 65, class: vgpr_32 }
    167   - { id: 66, class: vgpr_32 }
    168   - { id: 67, class: vreg_64 }
    169   - { id: 68, class: vgpr_32 }
    170   - { id: 69, class: vgpr_32 }
    171   - { id: 70, class: vgpr_32 }
    172   - { id: 71, class: vgpr_32 }
    173   - { id: 72, class: vgpr_32 }
    174   - { id: 73, class: vgpr_32 }
    175   - { id: 74, class: vgpr_32 }
    176   - { id: 75, class: vreg_64 }
    177   - { id: 76, class: vgpr_32 }
    178   - { id: 77, class: vgpr_32 }
    179   - { id: 78, class: vgpr_32 }
    180   - { id: 79, class: vgpr_32 }
    181   - { id: 80, class: vreg_64 }
    182   - { id: 81, class: vgpr_32 }
    183   - { id: 82, class: vgpr_32 }
    184   - { id: 83, class: vgpr_32 }
    185 liveins:
    186   - { reg: '$sgpr4_sgpr5', virtual-reg: '%4' }
    187 frameInfo:
    188   isFrameAddressTaken: false
    189   isReturnAddressTaken: false
    190   hasStackMap:     false
    191   hasPatchPoint:   false
    192   stackSize:       0
    193   offsetAdjustment: 0
    194   maxAlignment:    0
    195   adjustsStack:    false
    196   hasCalls:        false
    197   hasOpaqueSPAdjustment: false
    198   hasVAStart:      false
    199   hasMustTailInVarArgFunc: false
    200 body:             |
    201   bb.0.bb:
    202     successors: %bb.2.bb2(0x80000000)
    203     liveins: $sgpr4_sgpr5
    204 
    205     %4 = COPY $sgpr4_sgpr5
    206     %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    207     %8 = S_MOV_B64 0
    208     %7 = COPY %9
    209     %30 = V_MOV_B32_e32 1, implicit $exec
    210     S_BRANCH %bb.2.bb2
    211 
    212   bb.1.bb1:
    213     S_ENDPGM
    214 
    215   bb.2.bb2:
    216     successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000)
    217 
    218     %0 = PHI %8, %bb.0.bb, %1, %bb.2.bb2
    219     %13 = COPY %7.sub1
    220     %14 = S_ADD_U32 %7.sub0, %0.sub0, implicit-def $scc
    221     %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
    222     %16 = REG_SEQUENCE %14, 1, %15, 2
    223     %18 = COPY %16
    224     %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
    225     %60 = V_BFE_U32 %17, 8, 8, implicit $exec
    226     %61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec
    227     %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
    228     %66 = COPY %13
    229     %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
    230     %67 = REG_SEQUENCE %70, 1, killed %65, 2
    231     FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
    232     %37 = S_ADD_U32 %14, 4, implicit-def $scc
    233     %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
    234     %71 = COPY killed %37
    235     %72 = COPY killed %38
    236     %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
    237     %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
    238     %73 = V_BFE_U32 %40, 8, 8, implicit $exec
    239     %74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec
    240     %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
    241     %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
    242     %80 = REG_SEQUENCE %83, 1, killed %78, 2
    243     FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
    244     %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
    245     %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
    246     %57 = REG_SEQUENCE %55, 1, killed %56, 2
    247     %1 = COPY %57
    248     S_CMPK_EQ_I32 %55, 4096, implicit-def $scc
    249     S_CBRANCH_SCC1 %bb.1.bb1, implicit $scc
    250     S_BRANCH %bb.2.bb2
    251 
    252 ...
    253 ---
    254 name:            sdwa_sgpr_operand
    255 alignment:       0
    256 exposesReturnsTwice: false
    257 legalized:       false
    258 regBankSelected: false
    259 selected:        false
    260 tracksRegLiveness: true
    261 registers:
    262   - { id: 0, class: sreg_64 }
    263   - { id: 1, class: sreg_64 }
    264   - { id: 2, class: vgpr_32 }
    265   - { id: 3, class: sgpr_128 }
    266   - { id: 4, class: sgpr_64 }
    267   - { id: 5, class: sreg_32_xm0 }
    268   - { id: 6, class: sgpr_32 }
    269   - { id: 7, class: sreg_64 }
    270   - { id: 8, class: sreg_64 }
    271   - { id: 9, class: sreg_64_xexec }
    272   - { id: 10, class: sreg_32_xm0 }
    273   - { id: 11, class: sreg_32_xm0 }
    274   - { id: 12, class: sreg_32_xm0 }
    275   - { id: 13, class: sreg_32_xm0 }
    276   - { id: 14, class: sreg_32_xm0 }
    277   - { id: 15, class: sreg_32_xm0 }
    278   - { id: 16, class: sreg_64 }
    279   - { id: 17, class: vgpr_32 }
    280   - { id: 18, class: vreg_64 }
    281   - { id: 19, class: sreg_32_xm0 }
    282   - { id: 20, class: sreg_32 }
    283   - { id: 21, class: sreg_32_xm0 }
    284   - { id: 22, class: sreg_32_xm0 }
    285   - { id: 23, class: sreg_32_xm0 }
    286   - { id: 24, class: sreg_64 }
    287   - { id: 25, class: sreg_32_xm0 }
    288   - { id: 26, class: sreg_32_xm0 }
    289   - { id: 27, class: sreg_32_xm0 }
    290   - { id: 28, class: sreg_32_xm0 }
    291   - { id: 29, class: sreg_64 }
    292   - { id: 30, class: vgpr_32 }
    293   - { id: 31, class: vreg_64 }
    294   - { id: 32, class: sreg_32_xm0 }
    295   - { id: 33, class: sreg_32_xm0 }
    296   - { id: 34, class: sreg_64 }
    297   - { id: 35, class: sreg_32_xm0 }
    298   - { id: 36, class: sreg_32_xm0 }
    299   - { id: 37, class: sreg_32_xm0 }
    300   - { id: 38, class: sreg_32_xm0 }
    301   - { id: 39, class: vreg_64 }
    302   - { id: 40, class: vgpr_32 }
    303   - { id: 41, class: vreg_64 }
    304   - { id: 42, class: sreg_32_xm0 }
    305   - { id: 43, class: sreg_32 }
    306   - { id: 44, class: sreg_32_xm0 }
    307   - { id: 45, class: sreg_64 }
    308   - { id: 46, class: sreg_32_xm0 }
    309   - { id: 47, class: sreg_32_xm0 }
    310   - { id: 48, class: sreg_32_xm0 }
    311   - { id: 49, class: sreg_32_xm0 }
    312   - { id: 50, class: sreg_64 }
    313   - { id: 51, class: vreg_64 }
    314   - { id: 52, class: sreg_64 }
    315   - { id: 53, class: sreg_32_xm0 }
    316   - { id: 54, class: sreg_32_xm0 }
    317   - { id: 55, class: sreg_32_xm0 }
    318   - { id: 56, class: sreg_32_xm0 }
    319   - { id: 57, class: sreg_64 }
    320   - { id: 58, class: sreg_32_xm0 }
    321   - { id: 59, class: sreg_32_xm0 }
    322   - { id: 60, class: vgpr_32 }
    323   - { id: 61, class: vgpr_32 }
    324   - { id: 62, class: vreg_64 }
    325   - { id: 63, class: vgpr_32 }
    326   - { id: 64, class: vgpr_32 }
    327   - { id: 65, class: vgpr_32 }
    328   - { id: 66, class: vgpr_32 }
    329   - { id: 67, class: vreg_64 }
    330   - { id: 68, class: vgpr_32 }
    331   - { id: 69, class: vgpr_32 }
    332   - { id: 70, class: vgpr_32 }
    333   - { id: 71, class: vgpr_32 }
    334   - { id: 72, class: vgpr_32 }
    335   - { id: 73, class: vgpr_32 }
    336   - { id: 74, class: vgpr_32 }
    337   - { id: 75, class: vreg_64 }
    338   - { id: 76, class: vgpr_32 }
    339   - { id: 77, class: vgpr_32 }
    340   - { id: 78, class: vgpr_32 }
    341   - { id: 79, class: vgpr_32 }
    342   - { id: 80, class: vreg_64 }
    343   - { id: 81, class: vgpr_32 }
    344   - { id: 82, class: vgpr_32 }
    345   - { id: 83, class: vgpr_32 }
    346   - { id: 84, class: sreg_32_xm0 }
    347 liveins:
    348   - { reg: '$sgpr4_sgpr5', virtual-reg: '%4' }
    349 frameInfo:
    350   isFrameAddressTaken: false
    351   isReturnAddressTaken: false
    352   hasStackMap:     false
    353   hasPatchPoint:   false
    354   stackSize:       0
    355   offsetAdjustment: 0
    356   maxAlignment:    0
    357   adjustsStack:    false
    358   hasCalls:        false
    359   hasOpaqueSPAdjustment: false
    360   hasVAStart:      false
    361   hasMustTailInVarArgFunc: false
    362 body:             |
    363   bb.0.bb:
    364     successors: %bb.2.bb2(0x80000000)
    365     liveins: $sgpr4_sgpr5
    366 
    367     %4 = COPY $sgpr4_sgpr5
    368     %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    369     %8 = S_MOV_B64 0
    370     %7 = COPY %9
    371     %30 = V_MOV_B32_e32 1, implicit $exec
    372     %84 = S_MOV_B32 2
    373     S_BRANCH %bb.2.bb2
    374 
    375   bb.1.bb1:
    376     S_ENDPGM
    377 
    378   bb.2.bb2:
    379     successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000)
    380 
    381     %0 = PHI %8, %bb.0.bb, %1, %bb.2.bb2
    382     %13 = COPY %7.sub1
    383     %14 = S_ADD_U32 %7.sub0, %0.sub0, implicit-def $scc
    384     %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
    385     %16 = REG_SEQUENCE %14, 1, %15, 2
    386     %18 = COPY %16
    387     %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
    388     %60 = V_BFE_U32 %17, 8, 8, implicit $exec
    389     %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec
    390     %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
    391     %66 = COPY %13
    392     %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
    393     %67 = REG_SEQUENCE %70, 1, killed %65, 2
    394     FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
    395     %37 = S_ADD_U32 %14, 4, implicit-def $scc
    396     %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
    397     %71 = COPY killed %37
    398     %72 = COPY killed %38
    399     %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
    400     %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
    401     %73 = V_BFE_U32 %40, 8, 8, implicit $exec
    402     %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec
    403     %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
    404     %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
    405     %80 = REG_SEQUENCE %83, 1, killed %78, 2
    406     FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
    407     %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
    408     %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
    409     %57 = REG_SEQUENCE %55, 1, killed %56, 2
    410     %1 = COPY %57
    411     S_CMPK_EQ_I32 %55, 4096, implicit-def $scc
    412     S_CBRANCH_SCC1 %bb.1.bb1, implicit $scc
    413     S_BRANCH %bb.2.bb2
    414 
    415 ...
    416