Home | History | Annotate | Download | only in R600
      1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      3 
      4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
      5 
      6 
      7 ; FUNC-LABEL: @sext_in_reg_i1_i32
      8 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
      9 ; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
     10 ; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
     11 ; SI: BUFFER_STORE_DWORD [[EXTRACT]],
     12 
     13 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     14 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
     15 ; EG-NEXT: LSHR * [[ADDR]]
     16 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
     17   %shl = shl i32 %in, 31
     18   %sext = ashr i32 %shl, 31
     19   store i32 %sext, i32 addrspace(1)* %out
     20   ret void
     21 }
     22 
     23 ; FUNC-LABEL: @sext_in_reg_i8_to_i32
     24 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
     25 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
     26 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     27 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
     28 
     29 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     30 ; EG: ADD_INT
     31 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     32 ; EG-NEXT: LSHR * [[ADDR]]
     33 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     34   %c = add i32 %a, %b ; add to prevent folding into extload
     35   %shl = shl i32 %c, 24
     36   %ashr = ashr i32 %shl, 24
     37   store i32 %ashr, i32 addrspace(1)* %out, align 4
     38   ret void
     39 }
     40 
     41 ; FUNC-LABEL: @sext_in_reg_i16_to_i32
     42 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
     43 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
     44 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     45 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
     46 
     47 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     48 ; EG: ADD_INT
     49 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     50 ; EG-NEXT: LSHR * [[ADDR]]
     51 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     52   %c = add i32 %a, %b ; add to prevent folding into extload
     53   %shl = shl i32 %c, 16
     54   %ashr = ashr i32 %shl, 16
     55   store i32 %ashr, i32 addrspace(1)* %out, align 4
     56   ret void
     57 }
     58 
     59 ; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
     60 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
     61 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
     62 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     63 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
     64 
     65 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     66 ; EG: ADD_INT
     67 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     68 ; EG-NEXT: LSHR * [[ADDR]]
     69 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
     70   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
     71   %shl = shl <1 x i32> %c, <i32 24>
     72   %ashr = ashr <1 x i32> %shl, <i32 24>
     73   store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
     74   ret void
     75 }
     76 
     77 ; FUNC-LABEL: @sext_in_reg_i1_to_i64
     78 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
     79 ; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
     80 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
     81 ; SI: BUFFER_STORE_DWORDX2
     82 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
     83   %c = add i64 %a, %b
     84   %shl = shl i64 %c, 63
     85   %ashr = ashr i64 %shl, 63
     86   store i64 %ashr, i64 addrspace(1)* %out, align 8
     87   ret void
     88 }
     89 
     90 ; FUNC-LABEL: @sext_in_reg_i8_to_i64
     91 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
     92 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
     93 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
     94 ; SI: BUFFER_STORE_DWORDX2
     95 
     96 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
     97 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
     98 ; EG: ADD_INT
     99 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
    100 ; EG: ASHR [[RES_HI]]
    101 ; EG-NOT: BFE_INT
    102 ; EG: LSHR
    103 ; EG: LSHR
    104 ;; TODO Check address computation, using | with variables in {{}} does not work,
    105 ;; also the _LO/_HI order might be different
    106 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    107   %c = add i64 %a, %b
    108   %shl = shl i64 %c, 56
    109   %ashr = ashr i64 %shl, 56
    110   store i64 %ashr, i64 addrspace(1)* %out, align 8
    111   ret void
    112 }
    113 
    114 ; FUNC-LABEL: @sext_in_reg_i16_to_i64
    115 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
    116 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
    117 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
    118 ; SI: BUFFER_STORE_DWORDX2
    119 
    120 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
    121 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
    122 ; EG: ADD_INT
    123 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
    124 ; EG: ASHR [[RES_HI]]
    125 ; EG-NOT: BFE_INT
    126 ; EG: LSHR
    127 ; EG: LSHR
    128 ;; TODO Check address computation, using | with variables in {{}} does not work,
    129 ;; also the _LO/_HI order might be different
    130 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    131   %c = add i64 %a, %b
    132   %shl = shl i64 %c, 48
    133   %ashr = ashr i64 %shl, 48
    134   store i64 %ashr, i64 addrspace(1)* %out, align 8
    135   ret void
    136 }
    137 
    138 ; FUNC-LABEL: @sext_in_reg_i32_to_i64
    139 ; SI: S_LOAD_DWORD
    140 ; SI: S_LOAD_DWORD
    141 ; SI: S_ADD_I32 [[ADD:s[0-9]+]],
    142 ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
    143 ; SI: BUFFER_STORE_DWORDX2
    144 
    145 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
    146 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
    147 ; EG-NOT: BFE_INT
    148 ; EG: ADD_INT {{\*?}} [[RES_LO]]
    149 ; EG: ASHR [[RES_HI]]
    150 ; EG: ADD_INT
    151 ; EG: LSHR
    152 ; EG: LSHR
    153 ;; TODO Check address computation, using | with variables in {{}} does not work,
    154 ;; also the _LO/_HI order might be different
    155 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    156   %c = add i64 %a, %b
    157   %shl = shl i64 %c, 32
    158   %ashr = ashr i64 %shl, 32
    159   store i64 %ashr, i64 addrspace(1)* %out, align 8
    160   ret void
    161 }
    162 
    163 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
    164 ; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
    165 ; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
    166 ; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
    167 ; XSI: BUFFER_STORE_DWORD
    168 ; XEG: BFE_INT
    169 ; XEG: ASHR
    170 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
    171 ;   %c = add <1 x i64> %a, %b
    172 ;   %shl = shl <1 x i64> %c, <i64 56>
    173 ;   %ashr = ashr <1 x i64> %shl, <i64 56>
    174 ;   store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
    175 ;   ret void
    176 ; }
    177 
    178 ; FUNC-LABEL: @sext_in_reg_i1_in_i32_other_amount
    179 ; SI-NOT: BFE
    180 ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
    181 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
    182 
    183 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
    184 ; EG-NOT: BFE
    185 ; EG: ADD_INT
    186 ; EG: LSHL
    187 ; EG: ASHR [[RES]]
    188 ; EG: LSHR {{\*?}} [[ADDR]]
    189 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    190   %c = add i32 %a, %b
    191   %x = shl i32 %c, 6
    192   %y = ashr i32 %x, 7
    193   store i32 %y, i32 addrspace(1)* %out
    194   ret void
    195 }
    196 
    197 ; FUNC-LABEL: @sext_in_reg_v2i1_in_v2i32_other_amount
    198 ; SI: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
    199 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
    200 ; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
    201 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
    202 
    203 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    204 ; EG-NOT: BFE
    205 ; EG: ADD_INT
    206 ; EG: LSHL
    207 ; EG: ASHR [[RES]]
    208 ; EG: LSHL
    209 ; EG: ASHR [[RES]]
    210 ; EG: LSHR {{\*?}} [[ADDR]]
    211 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    212   %c = add <2 x i32> %a, %b
    213   %x = shl <2 x i32> %c, <i32 6, i32 6>
    214   %y = ashr <2 x i32> %x, <i32 7, i32 7>
    215   store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
    216   ret void
    217 }
    218 
    219 
    220 ; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
    221 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    222 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    223 ; SI: BUFFER_STORE_DWORDX2
    224 
    225 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    226 ; EG: BFE_INT [[RES]]
    227 ; EG: BFE_INT [[RES]]
    228 ; EG: LSHR {{\*?}} [[ADDR]]
    229 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    230   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    231   %shl = shl <2 x i32> %c, <i32 31, i32 31>
    232   %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
    233   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    234   ret void
    235 }
    236 
    237 ; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
    238 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    239 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    240 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    241 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    242 ; SI: BUFFER_STORE_DWORDX4
    243 
    244 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    245 ; EG: BFE_INT [[RES]]
    246 ; EG: BFE_INT [[RES]]
    247 ; EG: BFE_INT [[RES]]
    248 ; EG: BFE_INT [[RES]]
    249 ; EG: LSHR {{\*?}} [[ADDR]]
    250 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
    251   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
    252   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
    253   %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
    254   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    255   ret void
    256 }
    257 
    258 ; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
    259 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
    260 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
    261 ; SI: BUFFER_STORE_DWORDX2
    262 
    263 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    264 ; EG: BFE_INT [[RES]]
    265 ; EG: BFE_INT [[RES]]
    266 ; EG: LSHR {{\*?}} [[ADDR]]
    267 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    268   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    269   %shl = shl <2 x i32> %c, <i32 24, i32 24>
    270   %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
    271   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    272   ret void
    273 }
    274 
    275 ; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
    276 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
    277 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
    278 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
    279 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
    280 ; SI: BUFFER_STORE_DWORDX4
    281 
    282 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    283 ; EG: BFE_INT [[RES]]
    284 ; EG: BFE_INT [[RES]]
    285 ; EG: BFE_INT [[RES]]
    286 ; EG: BFE_INT [[RES]]
    287 ; EG: LSHR {{\*?}} [[ADDR]]
    288 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
    289   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
    290   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
    291   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
    292   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    293   ret void
    294 }
    295 
    296 ; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
    297 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
    298 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
    299 ; SI: BUFFER_STORE_DWORDX2
    300 
    301 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    302 ; EG: BFE_INT [[RES]]
    303 ; EG: BFE_INT [[RES]]
    304 ; EG: LSHR {{\*?}} [[ADDR]]
    305 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    306   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    307   %shl = shl <2 x i32> %c, <i32 16, i32 16>
    308   %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
    309   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    310   ret void
    311 }
    312 
    313 ; FUNC-LABEL: @testcase
    314 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
    315   %and_a_1 = and i8 %a, 1
    316   %cmp_eq = icmp eq i8 %and_a_1, 0
    317   %cmp_slt = icmp slt i8 %a, 0
    318   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
    319   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
    320   %xor = xor i8 %sel0, %sel1
    321   store i8 %xor, i8 addrspace(1)* %out
    322   ret void
    323 }
    324 
    325 ; FUNC-LABEL: @testcase_3
    326 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
    327   %and_a_1 = and i8 %a, 1
    328   %cmp_eq = icmp eq i8 %and_a_1, 0
    329   %cmp_slt = icmp slt i8 %a, 0
    330   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
    331   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
    332   %xor = xor i8 %sel0, %sel1
    333   store i8 %xor, i8 addrspace(1)* %out
    334   ret void
    335 }
    336 
    337 ; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32
    338 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    339 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    340 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    341 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    342 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
    343   %loada = load <4 x i32> addrspace(1)* %a, align 16
    344   %loadb = load <4 x i32> addrspace(1)* %b, align 16
    345   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
    346   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
    347   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
    348   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    349   ret void
    350 }
    351 
    352 ; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32
    353 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
    354 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
    355 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
    356   %loada = load <4 x i32> addrspace(1)* %a, align 16
    357   %loadb = load <4 x i32> addrspace(1)* %b, align 16
    358   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
    359   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
    360   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
    361   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    362   ret void
    363 }
    364 
    365 ; FIXME: The BFE should really be eliminated. I think it should happen
    366 ; when computeKnownBitsForTargetNode is implemented for imax.
    367 
    368 ; FUNC-LABEL: @sext_in_reg_to_illegal_type
    369 ; SI: BUFFER_LOAD_SBYTE
    370 ; SI: V_MAX_I32
    371 ; SI: V_BFE_I32
    372 ; SI: BUFFER_STORE_SHORT
    373 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
    374   %tmp5 = load i8 addrspace(1)* %src, align 1
    375   %tmp2 = sext i8 %tmp5 to i32
    376   %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
    377   %tmp4 = trunc i32 %tmp3 to i8
    378   %tmp6 = sext i8 %tmp4 to i16
    379   store i16 %tmp6, i16 addrspace(1)* %out, align 2
    380   ret void
    381 }
    382 
    383 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
    384 
    385 ; FUNC-LABEL: @bfe_0_width
    386 ; SI-NOT: BFE
    387 ; SI: S_ENDPGM
    388 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    389   %load = load i32 addrspace(1)* %ptr, align 4
    390   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
    391   store i32 %bfe, i32 addrspace(1)* %out, align 4
    392   ret void
    393 }
    394 
    395 ; FUNC-LABEL: @bfe_8_bfe_8
    396 ; SI: V_BFE_I32
    397 ; SI-NOT: BFE
    398 ; SI: S_ENDPGM
    399 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    400   %load = load i32 addrspace(1)* %ptr, align 4
    401   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
    402   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
    403   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    404   ret void
    405 }
    406 
    407 ; FUNC-LABEL: @bfe_8_bfe_16
    408 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    409 ; SI: S_ENDPGM
    410 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    411   %load = load i32 addrspace(1)* %ptr, align 4
    412   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
    413   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
    414   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    415   ret void
    416 }
    417 
    418 ; This really should be folded into 1
    419 ; FUNC-LABEL: @bfe_16_bfe_8
    420 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    421 ; SI-NOT: BFE
    422 ; SI: S_ENDPGM
    423 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    424   %load = load i32 addrspace(1)* %ptr, align 4
    425   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
    426   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
    427   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    428   ret void
    429 }
    430 
    431 ; Make sure there isn't a redundant BFE
    432 ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe
    433 ; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
    434 ; SI-NOT: BFE
    435 ; SI: S_ENDPGM
    436 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    437   %c = add i32 %a, %b ; add to prevent folding into extload
    438   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
    439   %shl = shl i32 %bfe, 24
    440   %ashr = ashr i32 %shl, 24
    441   store i32 %ashr, i32 addrspace(1)* %out, align 4
    442   ret void
    443 }
    444 
    445 ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong
    446 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    447   %c = add i32 %a, %b ; add to prevent folding into extload
    448   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
    449   %shl = shl i32 %bfe, 24
    450   %ashr = ashr i32 %shl, 24
    451   store i32 %ashr, i32 addrspace(1)* %out, align 4
    452   ret void
    453 }
    454 
    455 ; FUNC-LABEL: @sextload_i8_to_i32_bfe
    456 ; SI: BUFFER_LOAD_SBYTE
    457 ; SI-NOT: BFE
    458 ; SI: S_ENDPGM
    459 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
    460   %load = load i8 addrspace(1)* %ptr, align 1
    461   %sext = sext i8 %load to i32
    462   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
    463   %shl = shl i32 %bfe, 24
    464   %ashr = ashr i32 %shl, 24
    465   store i32 %ashr, i32 addrspace(1)* %out, align 4
    466   ret void
    467 }
    468 
    469 ; FUNC-LABEL: @sextload_i8_to_i32_bfe_0:
    470 ; SI-NOT: BFE
    471 ; SI: S_ENDPGM
    472 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
    473   %load = load i8 addrspace(1)* %ptr, align 1
    474   %sext = sext i8 %load to i32
    475   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
    476   %shl = shl i32 %bfe, 24
    477   %ashr = ashr i32 %shl, 24
    478   store i32 %ashr, i32 addrspace(1)* %out, align 4
    479   ret void
    480 }
    481 
    482 ; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0:
    483 ; SI-NOT: SHR
    484 ; SI-NOT: SHL
    485 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
    486 ; SI: S_ENDPGM
    487 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    488   %x = load i32 addrspace(1)* %in, align 4
    489   %shl = shl i32 %x, 31
    490   %shr = ashr i32 %shl, 31
    491   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
    492   store i32 %bfe, i32 addrspace(1)* %out, align 4
    493   ret void
    494 }
    495 
    496 ; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1
    497 ; SI: BUFFER_LOAD_DWORD
    498 ; SI-NOT: SHL
    499 ; SI-NOT: SHR
    500 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
    501 ; SI: S_ENDPGM
    502 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    503   %x = load i32 addrspace(1)* %in, align 4
    504   %shl = shl i32 %x, 30
    505   %shr = ashr i32 %shl, 30
    506   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
    507   store i32 %bfe, i32 addrspace(1)* %out, align 4
    508   ret void
    509 }
    510 
    511 ; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1:
    512 ; SI: BUFFER_LOAD_DWORD
    513 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
    514 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
    515 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
    516 ; SI: S_ENDPGM
    517 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    518   %x = load i32 addrspace(1)* %in, align 4
    519   %shl = shl i32 %x, 30
    520   %shr = ashr i32 %shl, 30
    521   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
    522   store i32 %bfe, i32 addrspace(1)* %out, align 4
    523   ret void
    524 }
    525