Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
      2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
      3 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      4 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      5 
      6 ; This particular case will actually be worse in terms of code size
      7 ; from sinking into both.
      8 
      9 ; OPT-LABEL: @sink_ubfe_i32(
     10 ; OPT: entry:
     11 ; OPT-NEXT: br i1
     12 
     13 ; OPT: bb0:
     14 ; OPT: %0 = lshr i32 %arg1, 8
     15 ; OPT-NEXT: %val0 = and i32 %0, 255
     16 ; OPT: br label
     17 
     18 ; OPT: bb1:
     19 ; OPT: %1 = lshr i32 %arg1, 8
     20 ; OPT-NEXT: %val1 = and i32 %1, 127
     21 ; OPT: br label
     22 
     23 ; OPT: ret:
     24 ; OPT: store
     25 ; OPT: ret
     26 
     27 
     28 ; GCN-LABEL: {{^}}sink_ubfe_i32:
     29 ; GCN-NOT: lshr
     30 ; GCN: s_cbranch_scc1
     31 
     32 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
     33 ; GCN: BB0_2:
     34 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
     35 
     36 ; GCN: BB0_3:
     37 ; GCN: buffer_store_dword
     38 ; GCN: s_endpgm
     39 define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
     40 entry:
     41   %shr = lshr i32 %arg1, 8
     42   br i1 undef, label %bb0, label %bb1
     43 
     44 bb0:
     45   %val0 = and i32 %shr, 255
     46   store volatile i32 0, i32 addrspace(1)* undef
     47   br label %ret
     48 
     49 bb1:
     50   %val1 = and i32 %shr, 127
     51   store volatile i32 0, i32 addrspace(1)* undef
     52   br label %ret
     53 
     54 ret:
     55   %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
     56   store i32 %phi, i32 addrspace(1)* %out
     57   ret void
     58 }
     59 
     60 ; OPT-LABEL: @sink_sbfe_i32(
     61 ; OPT: entry:
     62 ; OPT-NEXT: br i1
     63 
     64 ; OPT: bb0:
     65 ; OPT: %0 = ashr i32 %arg1, 8
     66 ; OPT-NEXT: %val0 = and i32 %0, 255
     67 ; OPT: br label
     68 
     69 ; OPT: bb1:
     70 ; OPT: %1 = ashr i32 %arg1, 8
     71 ; OPT-NEXT: %val1 = and i32 %1, 127
     72 ; OPT: br label
     73 
     74 ; OPT: ret:
     75 ; OPT: store
     76 ; OPT: ret
     77 
     78 ; GCN-LABEL: {{^}}sink_sbfe_i32:
     79 define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
     80 entry:
     81   %shr = ashr i32 %arg1, 8
     82   br i1 undef, label %bb0, label %bb1
     83 
     84 bb0:
     85   %val0 = and i32 %shr, 255
     86   store volatile i32 0, i32 addrspace(1)* undef
     87   br label %ret
     88 
     89 bb1:
     90   %val1 = and i32 %shr, 127
     91   store volatile i32 0, i32 addrspace(1)* undef
     92   br label %ret
     93 
     94 ret:
     95   %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
     96   store i32 %phi, i32 addrspace(1)* %out
     97   ret void
     98 }
     99 
    100 
    101 ; OPT-LABEL: @sink_ubfe_i16(
    102 ; OPT: entry:
    103 ; OPT-NEXT: br i1
    104 
    105 ; OPT: bb0:
    106 ; OPT: %0 = lshr i16 %arg1, 4
    107 ; OPT-NEXT: %val0 = and i16 %0, 255
    108 ; OPT: br label
    109 
    110 ; OPT: bb1:
    111 ; OPT: %1 = lshr i16 %arg1, 4
    112 ; OPT-NEXT: %val1 = and i16 %1, 127
    113 ; OPT: br label
    114 
    115 ; OPT: ret:
    116 ; OPT: store
    117 ; OPT: ret
    118 
    119 ; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
    120 
    121 ; GCN-LABEL: {{^}}sink_ubfe_i16:
    122 ; GCN-NOT: lshr
    123 ; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
    124 ; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
    125 ; GCN: s_cbranch_scc1
    126 
    127 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
    128 ; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff
    129 
    130 ; GCN: BB2_2:
    131 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
    132 ; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f
    133 
    134 ; GCN: BB2_3:
    135 ; GCN: buffer_store_short
    136 ; GCN: s_endpgm
    137 define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
    138 entry:
    139   %shr = lshr i16 %arg1, 4
    140   br i1 undef, label %bb0, label %bb1
    141 
    142 bb0:
    143   %val0 = and i16 %shr, 255
    144   store volatile i16 0, i16 addrspace(1)* undef
    145   br label %ret
    146 
    147 bb1:
    148   %val1 = and i16 %shr, 127
    149   store volatile i16 0, i16 addrspace(1)* undef
    150   br label %ret
    151 
    152 ret:
    153   %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
    154   store i16 %phi, i16 addrspace(1)* %out
    155   ret void
    156 }
    157 
    158 ; We don't really want to sink this one since it isn't reducible to a
    159 ; 32-bit BFE on one half of the integer.
    160 
    161 ; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
    162 ; OPT: entry:
    163 ; OPT-NOT: lshr
    164 ; OPT: br i1
    165 
    166 ; OPT: bb0:
    167 ; OPT: %0 = lshr i64 %arg1, 30
    168 ; OPT-NEXT: %val0 = and i64 %0, 255
    169 
    170 ; OPT: bb1:
    171 ; OPT: %1 = lshr i64 %arg1, 30
    172 ; OPT-NEXT: %val1 = and i64 %1, 127
    173 
    174 ; OPT: ret:
    175 ; OPT: store
    176 ; OPT: ret
    177 
    178 ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
    179 
    180 ; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
    181 ; GCN: s_cbranch_scc1 BB3_2
    182 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
    183 
    184 ; GCN: BB3_2:
    185 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
    186 
    187 ; GCN: BB3_3:
    188 ; GCN: buffer_store_dwordx2
    189 define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
    190 entry:
    191   %shr = lshr i64 %arg1, 30
    192   br i1 undef, label %bb0, label %bb1
    193 
    194 bb0:
    195   %val0 = and i64 %shr, 255
    196   store volatile i32 0, i32 addrspace(1)* undef
    197   br label %ret
    198 
    199 bb1:
    200   %val1 = and i64 %shr, 127
    201   store volatile i32 0, i32 addrspace(1)* undef
    202   br label %ret
    203 
    204 ret:
    205   %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
    206   store i64 %phi, i64 addrspace(1)* %out
    207   ret void
    208 }
    209 
    210 ; OPT-LABEL: @sink_ubfe_i64_low32(
    211 ; OPT: entry:
    212 ; OPT-NOT: lshr
    213 ; OPT: br i1
    214 
    215 ; OPT: bb0:
    216 ; OPT: %0 = lshr i64 %arg1, 15
    217 ; OPT-NEXT: %val0 = and i64 %0, 255
    218 
    219 ; OPT: bb1:
    220 ; OPT: %1 = lshr i64 %arg1, 15
    221 ; OPT-NEXT: %val1 = and i64 %1, 127
    222 
    223 ; OPT: ret:
    224 ; OPT: store
    225 ; OPT: ret
    226 
    227 ; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
    228 
    229 ; GCN: s_cbranch_scc1 BB4_2
    230 
    231 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
    232 
    233 ; GCN: BB4_2:
    234 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
    235 
    236 ; GCN: BB4_3:
    237 ; GCN: buffer_store_dwordx2
    238 define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
    239 entry:
    240   %shr = lshr i64 %arg1, 15
    241   br i1 undef, label %bb0, label %bb1
    242 
    243 bb0:
    244   %val0 = and i64 %shr, 255
    245   store volatile i32 0, i32 addrspace(1)* undef
    246   br label %ret
    247 
    248 bb1:
    249   %val1 = and i64 %shr, 127
    250   store volatile i32 0, i32 addrspace(1)* undef
    251   br label %ret
    252 
    253 ret:
    254   %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
    255   store i64 %phi, i64 addrspace(1)* %out
    256   ret void
    257 }
    258 
    259 ; OPT-LABEL: @sink_ubfe_i64_high32(
    260 ; OPT: entry:
    261 ; OPT-NOT: lshr
    262 ; OPT: br i1
    263 
    264 ; OPT: bb0:
    265 ; OPT: %0 = lshr i64 %arg1, 35
    266 ; OPT-NEXT: %val0 = and i64 %0, 255
    267 
    268 ; OPT: bb1:
    269 ; OPT: %1 = lshr i64 %arg1, 35
    270 ; OPT-NEXT: %val1 = and i64 %1, 127
    271 
    272 ; OPT: ret:
    273 ; OPT: store
    274 ; OPT: ret
    275 
    276 ; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
    277 ; GCN: s_cbranch_scc1 BB5_2
    278 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
    279 
    280 ; GCN: BB5_2:
    281 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
    282 
    283 ; GCN: BB5_3:
    284 ; GCN: buffer_store_dwordx2
    285 define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
    286 entry:
    287   %shr = lshr i64 %arg1, 35
    288   br i1 undef, label %bb0, label %bb1
    289 
    290 bb0:
    291   %val0 = and i64 %shr, 255
    292   store volatile i32 0, i32 addrspace(1)* undef
    293   br label %ret
    294 
    295 bb1:
    296   %val1 = and i64 %shr, 127
    297   store volatile i32 0, i32 addrspace(1)* undef
    298   br label %ret
    299 
    300 ret:
    301   %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
    302   store i64 %phi, i64 addrspace(1)* %out
    303   ret void
    304 }
    305 
    306 attributes #0 = { nounwind }
    307