Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-lower-control-flow -o - %s | FileCheck %s
      2 # Getting an undef that is specifically a VGPR is tricky from IR
      3 
      4 # CHECK-LABEL: name: extract_undef_offset_vgpr{{$}}
      5 # CHECK: bb.1:
      6 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
      7 # CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
      8 
      9 # CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
     10 # CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
     11 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
     12 
     13 # CHECK: bb.2:
     14 # CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
     15 
     16 
     17 --- |
     18   target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
     19 
     20   define void @extract_undef_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     21   entry:
     22     %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
     23     %value = extractelement <4 x i32> %ld, i32 undef
     24     store i32 %value, i32 addrspace(1)* %out
     25     ret void
     26   }
     27 
     28   define void @extract_undef_neg_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     29   entry:
     30     %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
     31     %value = extractelement <4 x i32> %ld, i32 undef
     32     store i32 %value, i32 addrspace(1)* %out
     33     ret void
     34   }
     35 
     36   define void @insert_undef_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     37   entry:
     38     %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
     39     %value = insertelement <4 x i32> %ld, i32 5, i32 undef
     40     store <4 x i32> %value, <4 x i32> addrspace(1)* %out
     41     ret void
     42   }
     43 
     44   define void @insert_undef_neg_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     45   entry:
     46     %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
     47     %value = insertelement <4 x i32> %ld, i32 5, i32 undef
     48     store <4 x i32> %value, <4 x i32> addrspace(1)* %out
     49     ret void
     50   }
     51 
     52   define void @insert_undef_value_offset_vgpr(<4 x i32> addrspace(1)*%out, <4 x i32> addrspace(1)* %in, i32 %idx) {
     53   entry:
     54     %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
     55     %value = insertelement <4 x i32> %ld, i32 undef, i32 %idx
     56     store <4 x i32> %value, <4 x i32> addrspace(1)* %out
     57     ret void
     58   }
     59 
     60 ...
     61 ---
     62 name:            extract_undef_offset_vgpr
     63 alignment:       0
     64 exposesReturnsTwice: false
     65 hasInlineAsm:    false
     66 allVRegsAllocated: true
     67 isSSA:           false
     68 tracksRegLiveness: true
     69 tracksSubRegLiveness: true
     70 liveins:
     71   - { reg: '%sgpr0_sgpr1' }
     72 frameInfo:
     73   isFrameAddressTaken: false
     74   isReturnAddressTaken: false
     75   hasStackMap:     false
     76   hasPatchPoint:   false
     77   stackSize:       0
     78   offsetAdjustment: 0
     79   maxAlignment:    0
     80   adjustsStack:    false
     81   hasCalls:        false
     82   maxCallFrameSize: 0
     83   hasOpaqueSPAdjustment: false
     84   hasVAStart:      false
     85   hasMustTailInVarArgFunc: false
     86 body:             |
     87   bb.0.entry:
     88     liveins: %sgpr0_sgpr1
     89 
     90     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
     91     %sgpr7 = S_MOV_B32 61440
     92     %sgpr6 = S_MOV_B32 -1
     93     S_WAITCNT 127
     94     %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
     95     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
     96     S_WAITCNT 3952
     97     %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
     98     S_WAITCNT 127
     99     BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
    100     S_ENDPGM
    101 
    102 ...
    103 
    104 # CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}}
    105 # CHECK: bb.1:
    106 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
    107 # CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
    108 
    109 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
    110 # CHECK: %m0 = S_MOV_B32 %vcc_lo
    111 # CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
    112 # CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
    113 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
    114 
    115 # CHECK: bb.2:
    116 # CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1
    117 
    118 name:            extract_undef_neg_offset_vgpr
    119 alignment:       0
    120 exposesReturnsTwice: false
    121 hasInlineAsm:    false
    122 allVRegsAllocated: true
    123 isSSA:           false
    124 tracksRegLiveness: true
    125 tracksSubRegLiveness: true
    126 liveins:
    127   - { reg: '%sgpr0_sgpr1' }
    128 frameInfo:
    129   isFrameAddressTaken: false
    130   isReturnAddressTaken: false
    131   hasStackMap:     false
    132   hasPatchPoint:   false
    133   stackSize:       0
    134   offsetAdjustment: 0
    135   maxAlignment:    0
    136   adjustsStack:    false
    137   hasCalls:        false
    138   maxCallFrameSize: 0
    139   hasOpaqueSPAdjustment: false
    140   hasVAStart:      false
    141   hasMustTailInVarArgFunc: false
    142 body:             |
    143   bb.0.entry:
    144     liveins: %sgpr0_sgpr1
    145 
    146     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
    147     %sgpr7 = S_MOV_B32 61440
    148     %sgpr6 = S_MOV_B32 -1
    149     S_WAITCNT 127
    150     %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
    151     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
    152     S_WAITCNT 3952
    153     %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
    154     S_WAITCNT 127
    155     BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
    156     S_ENDPGM
    157 
    158 ...
    159 
    160 # CHECK-LABEL: name: insert_undef_offset_vgpr{{$}}
    161 # CHECK: bb.1:
    162 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
    163 # CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
    164 
    165 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
    166 # CHECK: %m0 = S_MOV_B32 %vcc_lo
    167 # CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
    168 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
    169 
    170 # CHECK: bb.2:
    171 # CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1
    172 
    173 name:            insert_undef_offset_vgpr
    174 alignment:       0
    175 exposesReturnsTwice: false
    176 hasInlineAsm:    false
    177 allVRegsAllocated: true
    178 isSSA:           false
    179 tracksRegLiveness: true
    180 tracksSubRegLiveness: true
    181 liveins:
    182   - { reg: '%sgpr0_sgpr1' }
    183 frameInfo:
    184   isFrameAddressTaken: false
    185   isReturnAddressTaken: false
    186   hasStackMap:     false
    187   hasPatchPoint:   false
    188   stackSize:       0
    189   offsetAdjustment: 0
    190   maxAlignment:    0
    191   adjustsStack:    false
    192   hasCalls:        false
    193   maxCallFrameSize: 0
    194   hasOpaqueSPAdjustment: false
    195   hasVAStart:      false
    196   hasMustTailInVarArgFunc: false
    197 body:             |
    198   bb.0.entry:
    199     liveins: %sgpr0_sgpr1
    200 
    201     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
    202     %sgpr7 = S_MOV_B32 61440
    203     %sgpr6 = S_MOV_B32 -1
    204     %vgpr4 = V_MOV_B32_e32 5, implicit %exec
    205     S_WAITCNT 127
    206     %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
    207     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
    208     S_WAITCNT 3952
    209     %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
    210     S_WAITCNT 127
    211     BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
    212     S_ENDPGM
    213 
    214 ...
    215 
    216 # CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}}
    217 # CHECK: bb.1:
    218 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
    219 # CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
    220 
    221 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
    222 # CHECK: %m0 = S_MOV_B32 %vcc_lo
    223 # CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
    224 # CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
    225 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
    226 
    227 # CHECK: bb.2:
    228 # CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
    229 
    230 name:            insert_undef_neg_offset_vgpr
    231 alignment:       0
    232 exposesReturnsTwice: false
    233 hasInlineAsm:    false
    234 allVRegsAllocated: true
    235 isSSA:           false
    236 tracksRegLiveness: true
    237 tracksSubRegLiveness: true
    238 liveins:
    239   - { reg: '%sgpr0_sgpr1' }
    240 frameInfo:
    241   isFrameAddressTaken: false
    242   isReturnAddressTaken: false
    243   hasStackMap:     false
    244   hasPatchPoint:   false
    245   stackSize:       0
    246   offsetAdjustment: 0
    247   maxAlignment:    0
    248   adjustsStack:    false
    249   hasCalls:        false
    250   maxCallFrameSize: 0
    251   hasOpaqueSPAdjustment: false
    252   hasVAStart:      false
    253   hasMustTailInVarArgFunc: false
    254 body:             |
    255   bb.0.entry:
    256     liveins: %sgpr0_sgpr1
    257 
    258     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
    259     %sgpr7 = S_MOV_B32 61440
    260     %sgpr6 = S_MOV_B32 -1
    261     %vgpr4 = V_MOV_B32_e32 5, implicit %exec
    262     S_WAITCNT 127
    263     %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
    264     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
    265     S_WAITCNT 3952
    266     %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
    267     S_WAITCNT 127
    268     BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
    269     S_ENDPGM
    270 
    271 ...
    272 
    273 # CHECK-LABEL: insert_undef_value_offset_vgpr{{$}}
    274 # CHECK: bb.1:
    275 # CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
    276 # CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
    277 
    278 # CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec
    279 # CHECK: %m0 = S_MOV_B32 %vcc_lo
    280 # CHECK: %vgpr0 = V_MOVRELD_B32_e32 undef %vgpr10, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
    281 # CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
    282 
    283 # CHECK: bb.2:
    284 # CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
    285 
    286 name:            insert_undef_value_offset_vgpr
    287 alignment:       0
    288 exposesReturnsTwice: false
    289 hasInlineAsm:    false
    290 allVRegsAllocated: true
    291 isSSA:           false
    292 tracksRegLiveness: true
    293 tracksSubRegLiveness: true
    294 liveins:
    295   - { reg: '%sgpr0_sgpr1' }
    296 frameInfo:
    297   isFrameAddressTaken: false
    298   isReturnAddressTaken: false
    299   hasStackMap:     false
    300   hasPatchPoint:   false
    301   stackSize:       0
    302   offsetAdjustment: 0
    303   maxAlignment:    0
    304   adjustsStack:    false
    305   hasCalls:        false
    306   maxCallFrameSize: 0
    307   hasOpaqueSPAdjustment: false
    308   hasVAStart:      false
    309   hasMustTailInVarArgFunc: false
    310 body:             |
    311   bb.0.entry:
    312     liveins: %sgpr0_sgpr1
    313 
    314     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
    315     %sgpr7 = S_MOV_B32 61440
    316     %sgpr6 = S_MOV_B32 -1
    317     %vgpr4 = V_MOV_B32_e32 2, implicit %exec
    318     S_WAITCNT 127
    319     %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
    320     %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
    321     S_WAITCNT 3952
    322     %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, killed %vgpr4, 0, undef %vgpr10, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
    323     S_WAITCNT 127
    324     BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
    325     S_ENDPGM
    326 
    327 ...
    328