Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      3 
      4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
      5 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
      6 
      7 
      8 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
      9 ; SI: s_load_dword [[ARG:s[0-9]+]],
     10 ; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
     11 ; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
     12 ; SI: buffer_store_dword [[EXTRACT]],
     13 
     14 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     15 ; EG: LSHR * [[ADDR]]
     16 ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
     17 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
     18   %shl = shl i32 %in, 31
     19   %sext = ashr i32 %shl, 31
     20   store i32 %sext, i32 addrspace(1)* %out
     21   ret void
     22 }
     23 
     24 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
     25 ; SI: s_add_i32 [[VAL:s[0-9]+]],
     26 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
     27 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     28 ; SI: buffer_store_dword [[VEXTRACT]],
     29 
     30 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     31 ; EG: ADD_INT
     32 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     33 ; EG-NEXT: LSHR * [[ADDR]]
     34 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     35   %c = add i32 %a, %b ; add to prevent folding into extload
     36   %shl = shl i32 %c, 24
     37   %ashr = ashr i32 %shl, 24
     38   store i32 %ashr, i32 addrspace(1)* %out, align 4
     39   ret void
     40 }
     41 
     42 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
     43 ; SI: s_add_i32 [[VAL:s[0-9]+]],
     44 ; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
     45 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     46 ; SI: buffer_store_dword [[VEXTRACT]],
     47 
     48 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     49 ; EG: ADD_INT
     50 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     51 ; EG-NEXT: LSHR * [[ADDR]]
     52 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     53   %c = add i32 %a, %b ; add to prevent folding into extload
     54   %shl = shl i32 %c, 16
     55   %ashr = ashr i32 %shl, 16
     56   store i32 %ashr, i32 addrspace(1)* %out, align 4
     57   ret void
     58 }
     59 
     60 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
     61 ; SI: s_add_i32 [[VAL:s[0-9]+]],
     62 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
     63 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
     64 ; SI: buffer_store_dword [[VEXTRACT]],
     65 
     66 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
     67 ; EG: ADD_INT
     68 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
     69 ; EG-NEXT: LSHR * [[ADDR]]
     70 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
     71   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
     72   %shl = shl <1 x i32> %c, <i32 24>
     73   %ashr = ashr <1 x i32> %shl, <i32 24>
     74   store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
     75   ret void
     76 }
     77 
     78 ; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
     79 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
     80 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000
     81 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
     82 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
     83 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
     84 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
     85   %c = shl i64 %a, %b
     86   %shl = shl i64 %c, 63
     87   %ashr = ashr i64 %shl, 63
     88   store i64 %ashr, i64 addrspace(1)* %out, align 8
     89   ret void
     90 }
     91 
     92 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
     93 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
     94 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000
     95 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
     96 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
     97 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
     98 
     99 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
    100 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
    101 ; EG: LSHL
    102 ; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
    103 ; EG: ASHR [[RES_HI]]
    104 ; EG-NOT: BFE_INT
    105 ; EG: LSHR
    106 ; EG: LSHR
    107 ;; TODO Check address computation, using | with variables in {{}} does not work,
    108 ;; also the _LO/_HI order might be different
    109 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    110   %c = shl i64 %a, %b
    111   %shl = shl i64 %c, 56
    112   %ashr = ashr i64 %shl, 56
    113   store i64 %ashr, i64 addrspace(1)* %out, align 8
    114   ret void
    115 }
    116 
    117 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
    118 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
    119 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000
    120 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
    121 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
    122 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
    123 
    124 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
    125 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
    126 ; EG: LSHL
    127 ; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
    128 ; EG: ASHR [[RES_HI]]
    129 ; EG-NOT: BFE_INT
    130 ; EG: LSHR
    131 ; EG: LSHR
    132 ;; TODO Check address computation, using | with variables in {{}} does not work,
    133 ;; also the _LO/_HI order might be different
    134 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    135   %c = shl i64 %a, %b
    136   %shl = shl i64 %c, 48
    137   %ashr = ashr i64 %shl, 48
    138   store i64 %ashr, i64 addrspace(1)* %out, align 8
    139   ret void
    140 }
    141 
    142 ; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
    143 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
    144 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000
    145 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
    146 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
    147 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
    148 
    149 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
    150 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
    151 ; EG-NOT: BFE_INT
    152 
    153 ; EG: ASHR [[RES_HI]]
    154 
    155 ; EG: LSHR
    156 ; EG: LSHR
    157 ;; TODO Check address computation, using | with variables in {{}} does not work,
    158 ;; also the _LO/_HI order might be different
    159 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    160   %c = shl i64 %a, %b
    161   %shl = shl i64 %c, 32
    162   %ashr = ashr i64 %shl, 32
    163   store i64 %ashr, i64 addrspace(1)* %out, align 8
    164   ret void
    165 }
    166 
    167 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
    168 ; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
    169 ; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
    170 ; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31
    171 ; XSI: buffer_store_dword
    172 ; XEG: BFE_INT
    173 ; XEG: ASHR
    174 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
    175 ;   %c = add <1 x i64> %a, %b
    176 ;   %shl = shl <1 x i64> %c, <i64 56>
    177 ;   %ashr = ashr <1 x i64> %shl, <i64 56>
    178 ;   store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
    179 ;   ret void
    180 ; }
    181 
    182 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64:
    183 ; SI: buffer_load_dwordx2
    184 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    185 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
    186 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    187 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    188 define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    189   %tid = call i32 @llvm.r600.read.tidig.x()
    190   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    191   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    192   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    193   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    194   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    195 
    196   %c = shl i64 %a, %b
    197   %shl = shl i64 %c, 63
    198   %ashr = ashr i64 %shl, 63
    199   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    200   ret void
    201 }
    202 
    203 ; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64:
    204 ; SI: buffer_load_dwordx2
    205 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    206 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8
    207 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    208 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    209 define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    210   %tid = call i32 @llvm.r600.read.tidig.x()
    211   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    212   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    213   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    214   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    215   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    216 
    217   %c = shl i64 %a, %b
    218   %shl = shl i64 %c, 56
    219   %ashr = ashr i64 %shl, 56
    220   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    221   ret void
    222 }
    223 
    224 ; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64:
    225 ; SI: buffer_load_dwordx2
    226 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    227 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16
    228 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    229 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    230 define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    231   %tid = call i32 @llvm.r600.read.tidig.x()
    232   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    233   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    234   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    235   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    236   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    237 
    238   %c = shl i64 %a, %b
    239   %shl = shl i64 %c, 48
    240   %ashr = ashr i64 %shl, 48
    241   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    242   ret void
    243 }
    244 
    245 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64:
    246 ; SI: buffer_load_dwordx2
    247 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
    248 ; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
    249 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
    250 define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    251   %tid = call i32 @llvm.r600.read.tidig.x()
    252   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    253   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    254   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    255   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    256   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    257 
    258   %c = shl i64 %a, %b
    259   %shl = shl i64 %c, 32
    260   %ashr = ashr i64 %shl, 32
    261   store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
    262   ret void
    263 }
    264 
    265 ; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
    266 ; SI-NOT: s_lshl
    267 ; SI-NOT: s_ashr
    268 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
    269 
    270 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
    271 ; EG-NOT: BFE
    272 ; EG: ADD_INT
    273 ; EG: LSHL
    274 ; EG: ASHR [[RES]]
    275 ; EG: LSHR {{\*?}} [[ADDR]]
    276 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    277   %c = add i32 %a, %b
    278   %x = shl i32 %c, 6
    279   %y = ashr i32 %x, 7
    280   store i32 %y, i32 addrspace(1)* %out
    281   ret void
    282 }
    283 
    284 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
    285 ; SI-NOT: s_lshl
    286 ; SI-NOT: s_ashr
    287 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
    288 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
    289 ; SI: s_endpgm
    290 
    291 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    292 ; EG-NOT: BFE
    293 ; EG: ADD_INT
    294 ; EG: LSHL
    295 ; EG: ASHR [[RES]]
    296 ; EG: LSHL
    297 ; EG: ASHR [[RES]]
    298 ; EG: LSHR {{\*?}} [[ADDR]]
    299 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    300   %c = add <2 x i32> %a, %b
    301   %x = shl <2 x i32> %c, <i32 6, i32 6>
    302   %y = ashr <2 x i32> %x, <i32 7, i32 7>
    303   store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
    304   ret void
    305 }
    306 
    307 
    308 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
    309 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    310 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    311 ; SI: buffer_store_dwordx2
    312 
    313 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    314 ; EG: BFE_INT [[RES]]
    315 ; EG: BFE_INT [[RES]]
    316 ; EG: LSHR {{\*?}} [[ADDR]]
    317 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    318   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    319   %shl = shl <2 x i32> %c, <i32 31, i32 31>
    320   %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
    321   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    322   ret void
    323 }
    324 
    325 ; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
    326 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    327 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    328 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    329 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
    330 ; SI: buffer_store_dwordx4
    331 
    332 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    333 ; EG: BFE_INT [[RES]]
    334 ; EG: BFE_INT [[RES]]
    335 ; EG: BFE_INT [[RES]]
    336 ; EG: BFE_INT [[RES]]
    337 ; EG: LSHR {{\*?}} [[ADDR]]
    338 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
    339   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
    340   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
    341   %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
    342   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    343   ret void
    344 }
    345 
    346 ; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
    347 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    348 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    349 ; SI: buffer_store_dwordx2
    350 
    351 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    352 ; EG: BFE_INT [[RES]]
    353 ; EG: BFE_INT [[RES]]
    354 ; EG: LSHR {{\*?}} [[ADDR]]
    355 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    356   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    357   %shl = shl <2 x i32> %c, <i32 24, i32 24>
    358   %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
    359   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    360   ret void
    361 }
    362 
    363 ; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
    364 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    365 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    366 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    367 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
    368 ; SI: buffer_store_dwordx4
    369 
    370 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    371 ; EG: BFE_INT [[RES]]
    372 ; EG: BFE_INT [[RES]]
    373 ; EG: BFE_INT [[RES]]
    374 ; EG: BFE_INT [[RES]]
    375 ; EG: LSHR {{\*?}} [[ADDR]]
    376 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
    377   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
    378   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
    379   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
    380   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    381   ret void
    382 }
    383 
    384 ; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
    385 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
    386 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
    387 ; SI: buffer_store_dwordx2
    388 
    389 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
    390 ; EG: BFE_INT [[RES]]
    391 ; EG: BFE_INT [[RES]]
    392 ; EG: LSHR {{\*?}} [[ADDR]]
    393 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
    394   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
    395   %shl = shl <2 x i32> %c, <i32 16, i32 16>
    396   %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
    397   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
    398   ret void
    399 }
    400 
    401 ; FUNC-LABEL: {{^}}testcase:
    402 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
    403   %and_a_1 = and i8 %a, 1
    404   %cmp_eq = icmp eq i8 %and_a_1, 0
    405   %cmp_slt = icmp slt i8 %a, 0
    406   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
    407   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
    408   %xor = xor i8 %sel0, %sel1
    409   store i8 %xor, i8 addrspace(1)* %out
    410   ret void
    411 }
    412 
    413 ; FUNC-LABEL: {{^}}testcase_3:
    414 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
    415   %and_a_1 = and i8 %a, 1
    416   %cmp_eq = icmp eq i8 %and_a_1, 0
    417   %cmp_slt = icmp slt i8 %a, 0
    418   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
    419   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
    420   %xor = xor i8 %sel0, %sel1
    421   store i8 %xor, i8 addrspace(1)* %out
    422   ret void
    423 }
    424 
    425 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
    426 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    427 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    428 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    429 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
    430 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
    431   %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
    432   %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
    433   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
    434   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
    435   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
    436   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    437   ret void
    438 }
    439 
    440 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
    441 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
    442 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
    443 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
    444   %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
    445   %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
    446   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
    447   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
    448   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
    449   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
    450   ret void
    451 }
    452 
    453 ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
    454 ; SI: buffer_load_sbyte
    455 ; SI: v_max_i32
    456 ; SI-NOT: bfe
    457 ; SI: buffer_store_short
    458 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
    459   %tmp5 = load i8, i8 addrspace(1)* %src, align 1
    460   %tmp2 = sext i8 %tmp5 to i32
    461   %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
    462   %tmp4 = trunc i32 %tmp3 to i8
    463   %tmp6 = sext i8 %tmp4 to i16
    464   store i16 %tmp6, i16 addrspace(1)* %out, align 2
    465   ret void
    466 }
    467 
    468 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
    469 
    470 ; FUNC-LABEL: {{^}}bfe_0_width:
    471 ; SI-NOT: {{[^@]}}bfe
    472 ; SI: s_endpgm
    473 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    474   %load = load i32, i32 addrspace(1)* %ptr, align 4
    475   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
    476   store i32 %bfe, i32 addrspace(1)* %out, align 4
    477   ret void
    478 }
    479 
    480 ; FUNC-LABEL: {{^}}bfe_8_bfe_8:
    481 ; SI: v_bfe_i32
    482 ; SI-NOT: {{[^@]}}bfe
    483 ; SI: s_endpgm
    484 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    485   %load = load i32, i32 addrspace(1)* %ptr, align 4
    486   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
    487   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
    488   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    489   ret void
    490 }
    491 
    492 ; FUNC-LABEL: {{^}}bfe_8_bfe_16:
    493 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    494 ; SI: s_endpgm
    495 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    496   %load = load i32, i32 addrspace(1)* %ptr, align 4
    497   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
    498   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
    499   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    500   ret void
    501 }
    502 
    503 ; This really should be folded into 1
    504 ; FUNC-LABEL: {{^}}bfe_16_bfe_8:
    505 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    506 ; SI-NOT: {{[^@]}}bfe
    507 ; SI: s_endpgm
    508 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
    509   %load = load i32, i32 addrspace(1)* %ptr, align 4
    510   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
    511   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
    512   store i32 %bfe1, i32 addrspace(1)* %out, align 4
    513   ret void
    514 }
    515 
    516 ; Make sure there isn't a redundant BFE
    517 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
    518 ; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
    519 ; SI-NOT: {{[^@]}}bfe
    520 ; SI: s_endpgm
    521 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    522   %c = add i32 %a, %b ; add to prevent folding into extload
    523   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
    524   %shl = shl i32 %bfe, 24
    525   %ashr = ashr i32 %shl, 24
    526   store i32 %ashr, i32 addrspace(1)* %out, align 4
    527   ret void
    528 }
    529 
    530 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
    531 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    532   %c = add i32 %a, %b ; add to prevent folding into extload
    533   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
    534   %shl = shl i32 %bfe, 24
    535   %ashr = ashr i32 %shl, 24
    536   store i32 %ashr, i32 addrspace(1)* %out, align 4
    537   ret void
    538 }
    539 
    540 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
    541 ; SI: buffer_load_sbyte
    542 ; SI-NOT: {{[^@]}}bfe
    543 ; SI: s_endpgm
    544 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
    545   %load = load i8, i8 addrspace(1)* %ptr, align 1
    546   %sext = sext i8 %load to i32
    547   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
    548   %shl = shl i32 %bfe, 24
    549   %ashr = ashr i32 %shl, 24
    550   store i32 %ashr, i32 addrspace(1)* %out, align 4
    551   ret void
    552 }
    553 
    554 ; SI: .text
    555 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
    556 ; SI-NOT: {{[^@]}}bfe
    557 ; SI: s_endpgm
    558 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
    559   %load = load i8, i8 addrspace(1)* %ptr, align 1
    560   %sext = sext i8 %load to i32
    561   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
    562   %shl = shl i32 %bfe, 24
    563   %ashr = ashr i32 %shl, 24
    564   store i32 %ashr, i32 addrspace(1)* %out, align 4
    565   ret void
    566 }
    567 
    568 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
    569 ; SI-NOT: shr
    570 ; SI-NOT: shl
    571 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
    572 ; SI: s_endpgm
    573 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    574   %x = load i32, i32 addrspace(1)* %in, align 4
    575   %shl = shl i32 %x, 31
    576   %shr = ashr i32 %shl, 31
    577   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
    578   store i32 %bfe, i32 addrspace(1)* %out, align 4
    579   ret void
    580 }
    581 
    582 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
    583 ; SI: buffer_load_dword
    584 ; SI-NOT: shl
    585 ; SI-NOT: shr
    586 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
    587 ; SI: s_endpgm
    588 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    589   %x = load i32, i32 addrspace(1)* %in, align 4
    590   %shl = shl i32 %x, 30
    591   %shr = ashr i32 %shl, 30
    592   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
    593   store i32 %bfe, i32 addrspace(1)* %out, align 4
    594   ret void
    595 }
    596 
    597 ; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
    598 ; SI: buffer_load_dword
    599 ; SI-NOT: v_lshl
    600 ; SI-NOT: v_ashr
    601 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
    602 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
    603 ; SI: s_endpgm
    604 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    605   %x = load i32, i32 addrspace(1)* %in, align 4
    606   %shl = shl i32 %x, 30
    607   %shr = ashr i32 %shl, 30
    608   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
    609   store i32 %bfe, i32 addrspace(1)* %out, align 4
    610   ret void
    611 }
    612 
    613 ; Make sure we propagate the VALUness to users of a moved scalar BFE.
    614 
    615 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use:
    616 ; SI: buffer_load_dwordx2
    617 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
    618 ; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
    619 ; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    620 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
    621 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
    622 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
    623 define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
    624   %tid = call i32 @llvm.r600.read.tidig.x()
    625   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    626   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    627   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    628   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    629   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    630 
    631   %c = shl i64 %a, %b
    632   %shl = shl i64 %c, 63
    633   %ashr = ashr i64 %shl, 63
    634 
    635   %and = and i64 %ashr, %s.val
    636   store i64 %and, i64 addrspace(1)* %out.gep, align 8
    637   ret void
    638 }
    639 
    640 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use:
    641 ; SI: buffer_load_dwordx2
    642 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
    643 ; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
    644 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
    645 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]]
    646 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
    647 define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
    648   %tid = call i32 @llvm.r600.read.tidig.x()
    649   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    650   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
    651   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    652   %a = load i64, i64 addrspace(1)* %a.gep, align 8
    653   %b = load i64, i64 addrspace(1)* %b.gep, align 8
    654 
    655   %c = shl i64 %a, %b
    656   %shl = shl i64 %c, 32
    657   %ashr = ashr i64 %shl, 32
    658   %and = and i64 %ashr, %s.val
    659   store i64 %and, i64 addrspace(1)* %out.gep, align 8
    660   ret void
    661 }
    662