Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      3 
      4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
      5 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
      6 
      7 
      8 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
      9 ; SI: s_load_dword [[ARG:s[0-9]+]],
     10 ; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
     11 ; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
     12 ; SI: buffer_store_dword [[EXTRACT]],
     13 
     14 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     15 ; EG: LSHR * [[ADDR]]
     16 ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
     17 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
     18   %shl = shl i32 %in, 31
     19   %sext = ashr i32 %shl, 31
     20   store i32 %sext, i32 addrspace(1)* %out
     21   ret void
     22 }
     23 
     24 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
     25 ; SI: s_add_i32 [[VAL:s[0-9]+]],
     26 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
     27 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     28 ; SI: buffer_store_dword [[VEXTRACT]],
     29 
     30 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     31 ; EG: ADD_INT
     32 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     33 ; EG-NEXT: LSHR * [[ADDR]]
     34 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     35   %c = add i32 %a, %b ; add to prevent folding into extload
     36   %shl = shl i32 %c, 24
     37   %ashr = ashr i32 %shl, 24
     38   store i32 %ashr, i32 addrspace(1)* %out, align 4
     39   ret void
     40 }
     41 
     42 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
     43 ; SI: s_add_i32 [[VAL:s[0-9]+]],
     44 ; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
     45 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     46 ; SI: buffer_store_dword [[VEXTRACT]],
     47 
     48 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     49 ; EG: ADD_INT
     50 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     51 ; EG-NEXT: LSHR * [[ADDR]]
     52 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     53   %c = add i32 %a, %b ; add to prevent folding into extload
     54   %shl = shl i32 %c, 16
     55   %ashr = ashr i32 %shl, 16
     56   store i32 %ashr, i32 addrspace(1)* %out, align 4
     57   ret void
     58 }
     59 
     60 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
     61 ; SI: s_add_i32 [[VAL:s[0-9]+]],
     62 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
     63 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     64 ; SI: buffer_store_dword [[VEXTRACT]],
     65 
     66 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     67 ; EG: ADD_INT
     68 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     69 ; EG-NEXT: LSHR * [[ADDR]]
     70 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
     71   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
     72   %shl = shl <1 x i32> %c, <i32 24>
     73   %ashr = ashr <1 x i32> %shl, <i32 24>
     74   store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
     75   ret void
     76 }
     77 
     78 ; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
     79 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
     80 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000
     81 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
     82 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
     83 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
     84 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
     85   %c = shl i64 %a, %b
     86   %shl = shl i64 %c, 63
     87   %ashr = ashr i64 %shl, 63
     88   store i64 %ashr, i64 addrspace(1)* %out, align 8
     89   ret void
     90 }
     91 
     92 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
     93 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
     94 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000
     95 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
     96 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
     97 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
     98 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
     99   %c = shl i64 %a, %b
    100   %shl = shl i64 %c, 56
    101   %ashr = ashr i64 %shl, 56
    102   store i64 %ashr, i64 addrspace(1)* %out, align 8
    103   ret void
    104 }
    105 
    106 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
    107 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
    108 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000
    109 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
    110 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
    111 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
    112 
    113 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    114   %c = shl i64 %a, %b
    115   %shl = shl i64 %c, 48
    116   %ashr = ashr i64 %shl, 48
    117   store i64 %ashr, i64 addrspace(1)* %out, align 8
    118   ret void
    119 }
    120 
    121 ; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
    122 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
    123 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000
    124 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
    125 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
    126 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
    127 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    128   %c = shl i64 %a, %b
    129   %shl = shl i64 %c, 32
    130   %ashr = ashr i64 %shl, 32
    131   store i64 %ashr, i64 addrspace(1)* %out, align 8
    132   ret void
    133 }
    134 
    135 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
    136 ; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
    137 ; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
    138 ; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31
    139 ; XSI: buffer_store_dword
    140 ; XEG: BFE_INT
    141 ; XEG: ASHR
    142 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
    143 ;   %c = add <1 x i64> %a, %b
    144 ;   %shl = shl <1 x i64> %c, <i64 56>
    145 ;   %ashr = ashr <1 x i64> %shl, <i64 56>
    146 ;   store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
    147 ;   ret void
    148 ; }
    149 
    150 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64:
    151 ; SI: buffer_load_dwordx2
    152 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    153 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
    154 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    155 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    156 define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    157   %tid = call i32 @llvm.r600.read.tidig.x()
    158   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    159   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    160   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    161   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    162   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    163 
    164   %c = shl i64 %a, %b
    165   %shl = shl i64 %c, 63
    166   %ashr = ashr i64 %shl, 63
    167   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    168   ret void
    169 }
    170 
    171 ; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64:
    172 ; SI: buffer_load_dwordx2
    173 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    174 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8
    175 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    176 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    177 define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    178   %tid = call i32 @llvm.r600.read.tidig.x()
    179   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    180   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    181   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    182   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    183   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    184 
    185   %c = shl i64 %a, %b
    186   %shl = shl i64 %c, 56
    187   %ashr = ashr i64 %shl, 56
    188   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    189   ret void
    190 }
    191 
    192 ; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64:
    193 ; SI: buffer_load_dwordx2
    194 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    195 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16
    196 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    197 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    198 define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    199   %tid = call i32 @llvm.r600.read.tidig.x()
    200   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    201   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    202   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    203   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    204   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    205 
    206   %c = shl i64 %a, %b
    207   %shl = shl i64 %c, 48
    208   %ashr = ashr i64 %shl, 48
    209   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    210   ret void
    211 }
    212 
    213 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64:
    214 ; SI: buffer_load_dwordx2
    215 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
    216 ; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
    217 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
    218 define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    219   %tid = call i32 @llvm.r600.read.tidig.x()
    220   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    221   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    222   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    223   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    224   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    225 
    226   %c = shl i64 %a, %b
    227   %shl = shl i64 %c, 32
    228   %ashr = ashr i64 %shl, 32
    229   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    230   ret void
    231 }
    232 
    233 ; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
    234 ; SI-NOT: s_lshl
    235 ; SI-NOT: s_ashr
    236 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
    237 
    238 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
    239 ; EG-NOT: BFE
    240 ; EG: ADD_INT
    241 ; EG: LSHL
    242 ; EG: ASHR [[RES]]
    243 ; EG: LSHR {{\*?}} [[ADDR]]
    244 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    245   %c = add i32 %a, %b
    246   %x = shl i32 %c, 6
    247   %y = ashr i32 %x, 7
    248   store i32 %y, i32 addrspace(1)* %out
    249   ret void
    250 }
    251 
    252 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
    253 ; SI-NOT: s_lshl
    254 ; SI-NOT: s_ashr
    255 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
    256 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
    257 ; SI: s_endpgm
    258 
    259 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    260 ; EG-NOT: BFE
    261 ; EG: ADD_INT
    262 ; EG: LSHL
    263 ; EG: ASHR [[RES]]
    264 ; EG: LSHL
    265 ; EG: ASHR [[RES]]
    266 ; EG: LSHR {{\*?}} [[ADDR]]
    267 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    268   %c = add <2 x i32> %a, %b
    269   %x = shl <2 x i32> %c, <i32 6, i32 6>
    270   %y = ashr <2 x i32> %x, <i32 7, i32 7>
    271   store <2 x i32> %y, <2 x i32> addrspace(1)* %out
    272   ret void
    273 }
    274 
    275 
    276 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
    277 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    278 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    279 ; SI: buffer_store_dwordx2
    280 
    281 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    282 ; EG: BFE_INT [[RES]]
    283 ; EG: BFE_INT [[RES]]
    284 ; EG: LSHR {{\*?}} [[ADDR]]
    285 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    286   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    287   %shl = shl <2 x i32> %c, <i32 31, i32 31>
    288   %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
    289   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    290   ret void
    291 }
    292 
    293 ; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
    294 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    295 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    296 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    297 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    298 ; SI: buffer_store_dwordx4
    299 
    300 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    301 ; EG: BFE_INT [[RES]]
    302 ; EG: BFE_INT [[RES]]
    303 ; EG: BFE_INT [[RES]]
    304 ; EG: BFE_INT [[RES]]
    305 ; EG: LSHR {{\*?}} [[ADDR]]
    306 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
    307   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
    308   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
    309   %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
    310   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    311   ret void
    312 }
    313 
    314 ; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
    315 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    316 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    317 ; SI: buffer_store_dwordx2
    318 
    319 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    320 ; EG: BFE_INT [[RES]]
    321 ; EG: BFE_INT [[RES]]
    322 ; EG: LSHR {{\*?}} [[ADDR]]
    323 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    324   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    325   %shl = shl <2 x i32> %c, <i32 24, i32 24>
    326   %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
    327   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    328   ret void
    329 }
    330 
    331 ; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
    332 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    333 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    334 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    335 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    336 ; SI: buffer_store_dwordx4
    337 
    338 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    339 ; EG: BFE_INT [[RES]]
    340 ; EG: BFE_INT [[RES]]
    341 ; EG: BFE_INT [[RES]]
    342 ; EG: BFE_INT [[RES]]
    343 ; EG: LSHR {{\*?}} [[ADDR]]
    344 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
    345   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
    346   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
    347   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
    348   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    349   ret void
    350 }
    351 
    352 ; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
    353 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
    354 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
    355 ; SI: buffer_store_dwordx2
    356 
    357 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    358 ; EG: BFE_INT [[RES]]
    359 ; EG: BFE_INT [[RES]]
    360 ; EG: LSHR {{\*?}} [[ADDR]]
    361 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    362   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    363   %shl = shl <2 x i32> %c, <i32 16, i32 16>
    364   %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
    365   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    366   ret void
    367 }
    368 
    369 ; FUNC-LABEL: {{^}}testcase:
    370 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
    371   %and_a_1 = and i8 %a, 1
    372   %cmp_eq = icmp eq i8 %and_a_1, 0
    373   %cmp_slt = icmp slt i8 %a, 0
    374   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
    375   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
    376   %xor = xor i8 %sel0, %sel1
    377   store i8 %xor, i8 addrspace(1)* %out
    378   ret void
    379 }
    380 
    381 ; FUNC-LABEL: {{^}}testcase_3:
    382 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
    383   %and_a_1 = and i8 %a, 1
    384   %cmp_eq = icmp eq i8 %and_a_1, 0
    385   %cmp_slt = icmp slt i8 %a, 0
    386   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
    387   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
    388   %xor = xor i8 %sel0, %sel1
    389   store i8 %xor, i8 addrspace(1)* %out
    390   ret void
    391 }
    392 
    393 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
    394 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    395 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    396 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    397 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    398 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
    399   %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
    400   %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
    401   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
    402   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
    403   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
    404   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    405   ret void
    406 }
    407 
    408 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
    409 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
    410 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
    411 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
    412   %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
    413   %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
    414   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
    415   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
    416   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
    417   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    418   ret void
    419 }
    420 
    421 ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
    422 ; SI: buffer_load_sbyte
    423 ; SI: v_max_i32
    424 ; SI-NOT: bfe
    425 ; SI: buffer_store_short
    426 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
    427   %tmp5 = load i8, i8 addrspace(1)* %src, align 1
    428   %tmp2 = sext i8 %tmp5 to i32
    429   %tmp2.5 = icmp sgt i32 %tmp2, 0
    430   %tmp3 = select i1 %tmp2.5, i32 %tmp2, i32 0
    431   %tmp4 = trunc i32 %tmp3 to i8
    432   %tmp6 = sext i8 %tmp4 to i16
    433   store i16 %tmp6, i16 addrspace(1)* %out, align 2
    434   ret void
    435 }
    436 
    437 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
    438 
    439 ; FUNC-LABEL: {{^}}bfe_0_width:
    440 ; SI-NOT: {{[^@]}}bfe
    441 ; SI: s_endpgm
    442 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    443   %load = load i32, i32 addrspace(1)* %ptr, align 4
    444   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
    445   store i32 %bfe, i32 addrspace(1)* %out, align 4
    446   ret void
    447 }
    448 
    449 ; FUNC-LABEL: {{^}}bfe_8_bfe_8:
    450 ; SI: v_bfe_i32
    451 ; SI-NOT: {{[^@]}}bfe
    452 ; SI: s_endpgm
    453 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    454   %load = load i32, i32 addrspace(1)* %ptr, align 4
    455   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
    456   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
    457   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    458   ret void
    459 }
    460 
    461 ; FUNC-LABEL: {{^}}bfe_8_bfe_16:
    462 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    463 ; SI: s_endpgm
    464 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    465   %load = load i32, i32 addrspace(1)* %ptr, align 4
    466   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
    467   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
    468   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    469   ret void
    470 }
    471 
    472 ; This really should be folded into 1
    473 ; FUNC-LABEL: {{^}}bfe_16_bfe_8:
    474 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    475 ; SI-NOT: {{[^@]}}bfe
    476 ; SI: s_endpgm
    477 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    478   %load = load i32, i32 addrspace(1)* %ptr, align 4
    479   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
    480   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
    481   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    482   ret void
    483 }
    484 
    485 ; Make sure there isn't a redundant BFE
    486 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
    487 ; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
    488 ; SI-NOT: {{[^@]}}bfe
    489 ; SI: s_endpgm
    490 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    491   %c = add i32 %a, %b ; add to prevent folding into extload
    492   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
    493   %shl = shl i32 %bfe, 24
    494   %ashr = ashr i32 %shl, 24
    495   store i32 %ashr, i32 addrspace(1)* %out, align 4
    496   ret void
    497 }
    498 
    499 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
    500 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    501   %c = add i32 %a, %b ; add to prevent folding into extload
    502   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
    503   %shl = shl i32 %bfe, 24
    504   %ashr = ashr i32 %shl, 24
    505   store i32 %ashr, i32 addrspace(1)* %out, align 4
    506   ret void
    507 }
    508 
    509 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
    510 ; SI: buffer_load_sbyte
    511 ; SI-NOT: {{[^@]}}bfe
    512 ; SI: s_endpgm
    513 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
    514   %load = load i8, i8 addrspace(1)* %ptr, align 1
    515   %sext = sext i8 %load to i32
    516   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
    517   %shl = shl i32 %bfe, 24
    518   %ashr = ashr i32 %shl, 24
    519   store i32 %ashr, i32 addrspace(1)* %out, align 4
    520   ret void
    521 }
    522 
    523 ; SI: .text
    524 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
    525 ; SI-NOT: {{[^@]}}bfe
    526 ; SI: s_endpgm
    527 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
    528   %load = load i8, i8 addrspace(1)* %ptr, align 1
    529   %sext = sext i8 %load to i32
    530   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
    531   %shl = shl i32 %bfe, 24
    532   %ashr = ashr i32 %shl, 24
    533   store i32 %ashr, i32 addrspace(1)* %out, align 4
    534   ret void
    535 }
    536 
    537 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
    538 ; SI-NOT: shr
    539 ; SI-NOT: shl
    540 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
    541 ; SI: s_endpgm
    542 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    543   %x = load i32, i32 addrspace(1)* %in, align 4
    544   %shl = shl i32 %x, 31
    545   %shr = ashr i32 %shl, 31
    546   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
    547   store i32 %bfe, i32 addrspace(1)* %out, align 4
    548   ret void
    549 }
    550 
    551 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
    552 ; SI: buffer_load_dword
    553 ; SI-NOT: shl
    554 ; SI-NOT: shr
    555 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
    556 ; SI: s_endpgm
    557 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    558   %x = load i32, i32 addrspace(1)* %in, align 4
    559   %shl = shl i32 %x, 30
    560   %shr = ashr i32 %shl, 30
    561   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
    562   store i32 %bfe, i32 addrspace(1)* %out, align 4
    563   ret void
    564 }
    565 
    566 ; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
    567 ; SI: buffer_load_dword
    568 ; SI-NOT: v_lshl
    569 ; SI-NOT: v_ashr
    570 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
    571 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
    572 ; SI: s_endpgm
    573 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    574   %x = load i32, i32 addrspace(1)* %in, align 4
    575   %shl = shl i32 %x, 30
    576   %shr = ashr i32 %shl, 30
    577   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
    578   store i32 %bfe, i32 addrspace(1)* %out, align 4
    579   ret void
    580 }
    581 
    582 ; Make sure we propagate the VALUness to users of a moved scalar BFE.
    583 
    584 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use:
    585 ; SI: buffer_load_dwordx2
    586 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    587 ; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
    588 ; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    589 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
    590 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
    591 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
    592 define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
    593   %tid = call i32 @llvm.r600.read.tidig.x()
    594   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    595   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    596   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    597   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    598   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    599 
    600   %c = shl i64 %a, %b
    601   %shl = shl i64 %c, 63
    602   %ashr = ashr i64 %shl, 63
    603 
    604   %and = and i64 %ashr, %s.val
    605   store i64 %and, i64 addrspace(1)* %out.gep, align 8
    606   ret void
    607 }
    608 
    609 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use:
    610 ; SI: buffer_load_dwordx2
    611 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
    612 ; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
    613 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
    614 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]]
    615 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
    616 define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
    617   %tid = call i32 @llvm.r600.read.tidig.x()
    618   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    619   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    620   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    621   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    622   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    623 
    624   %c = shl i64 %a, %b
    625   %shl = shl i64 %c, 32
    626   %ashr = ashr i64 %shl, 32
    627   %and = and i64 %ashr, %s.val
    628   store i64 %and, i64 addrspace(1)* %out.gep, align 8
    629   ret void
    630 }
    631