Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
      2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
      3 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
      4 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
      5 
      6 ;===------------------------------------------------------------------------===;
      7 ; GLOBAL ADDRESS SPACE
      8 ;===------------------------------------------------------------------------===;
      9 
     10 ; Load an i8 value from the global address space.
     11 ; FUNC-LABEL: {{^}}load_i8:
     12 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
     13 
     14 ; SI: buffer_load_ubyte v{{[0-9]+}},
     15 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     16   %1 = load i8, i8 addrspace(1)* %in
     17   %2 = zext i8 %1 to i32
     18   store i32 %2, i32 addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; FUNC-LABEL: {{^}}load_i8_sext:
     23 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
     24 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
     25 ; R600: 8
     26 ; SI: buffer_load_sbyte
     27 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     28 entry:
     29   %0 = load i8, i8 addrspace(1)* %in
     30   %1 = sext i8 %0 to i32
     31   store i32 %1, i32 addrspace(1)* %out
     32   ret void
     33 }
     34 
     35 ; FUNC-LABEL: {{^}}load_v2i8:
     36 ; R600: VTX_READ_8
     37 ; R600: VTX_READ_8
     38 ; SI: buffer_load_ubyte
     39 ; SI: buffer_load_ubyte
     40 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
     41 entry:
     42   %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
     43   %1 = zext <2 x i8> %0 to <2 x i32>
     44   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
     45   ret void
     46 }
     47 
     48 ; FUNC-LABEL: {{^}}load_v2i8_sext:
     49 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
     50 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
     51 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
     52 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
     53 ; R600-DAG: 8
     54 ; R600-DAG: 8
     55 
     56 ; SI: buffer_load_sbyte
     57 ; SI: buffer_load_sbyte
     58 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
     59 entry:
     60   %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
     61   %1 = sext <2 x i8> %0 to <2 x i32>
     62   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
     63   ret void
     64 }
     65 
     66 ; FUNC-LABEL: {{^}}load_v4i8:
     67 ; R600: VTX_READ_8
     68 ; R600: VTX_READ_8
     69 ; R600: VTX_READ_8
     70 ; R600: VTX_READ_8
     71 ; SI: buffer_load_ubyte
     72 ; SI: buffer_load_ubyte
     73 ; SI: buffer_load_ubyte
     74 ; SI: buffer_load_ubyte
     75 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
     76 entry:
     77   %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
     78   %1 = zext <4 x i8> %0 to <4 x i32>
     79   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
     80   ret void
     81 }
     82 
     83 ; FUNC-LABEL: {{^}}load_v4i8_sext:
     84 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
     85 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
     86 ; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
     87 ; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
     88 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
     89 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
     90 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
     91 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
     92 ; R600-DAG: 8
     93 ; R600-DAG: 8
     94 ; R600-DAG: 8
     95 ; R600-DAG: 8
     96 ; SI: buffer_load_sbyte
     97 ; SI: buffer_load_sbyte
     98 ; SI: buffer_load_sbyte
     99 ; SI: buffer_load_sbyte
    100 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
    101 entry:
    102   %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
    103   %1 = sext <4 x i8> %0 to <4 x i32>
    104   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    105   ret void
    106 }
    107 
    108 ; Load an i16 value from the global address space.
    109 ; FUNC-LABEL: {{^}}load_i16:
    110 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    111 ; SI: buffer_load_ushort
    112 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
    113 entry:
    114   %0 = load i16	, i16	 addrspace(1)* %in
    115   %1 = zext i16 %0 to i32
    116   store i32 %1, i32 addrspace(1)* %out
    117   ret void
    118 }
    119 
    120 ; FUNC-LABEL: {{^}}load_i16_sext:
    121 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    122 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
    123 ; R600: 16
    124 ; SI: buffer_load_sshort
    125 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
    126 entry:
    127   %0 = load i16, i16 addrspace(1)* %in
    128   %1 = sext i16 %0 to i32
    129   store i32 %1, i32 addrspace(1)* %out
    130   ret void
    131 }
    132 
    133 ; FUNC-LABEL: {{^}}load_v2i16:
    134 ; R600: VTX_READ_16
    135 ; R600: VTX_READ_16
    136 ; SI: buffer_load_ushort
    137 ; SI: buffer_load_ushort
    138 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
    139 entry:
    140   %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
    141   %1 = zext <2 x i16> %0 to <2 x i32>
    142   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    143   ret void
    144 }
    145 
    146 ; FUNC-LABEL: {{^}}load_v2i16_sext:
    147 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    148 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    149 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
    150 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
    151 ; R600-DAG: 16
    152 ; R600-DAG: 16
    153 ; SI: buffer_load_sshort
    154 ; SI: buffer_load_sshort
    155 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
    156 entry:
    157   %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
    158   %1 = sext <2 x i16> %0 to <2 x i32>
    159   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    160   ret void
    161 }
    162 
    163 ; FUNC-LABEL: {{^}}load_v4i16:
    164 ; R600: VTX_READ_16
    165 ; R600: VTX_READ_16
    166 ; R600: VTX_READ_16
    167 ; R600: VTX_READ_16
    168 ; SI: buffer_load_ushort
    169 ; SI: buffer_load_ushort
    170 ; SI: buffer_load_ushort
    171 ; SI: buffer_load_ushort
    172 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
    173 entry:
    174   %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
    175   %1 = zext <4 x i16> %0 to <4 x i32>
    176   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    177   ret void
    178 }
    179 
    180 ; FUNC-LABEL: {{^}}load_v4i16_sext:
    181 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    182 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    183 ; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
    184 ; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
    185 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
    186 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
    187 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
    188 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
    189 ; R600-DAG: 16
    190 ; R600-DAG: 16
    191 ; R600-DAG: 16
    192 ; R600-DAG: 16
    193 ; SI: buffer_load_sshort
    194 ; SI: buffer_load_sshort
    195 ; SI: buffer_load_sshort
    196 ; SI: buffer_load_sshort
    197 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
    198 entry:
    199   %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
    200   %1 = sext <4 x i16> %0 to <4 x i32>
    201   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    202   ret void
    203 }
    204 
    205 ; load an i32 value from the global address space.
    206 ; FUNC-LABEL: {{^}}load_i32:
    207 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    208 
    209 ; SI: buffer_load_dword v{{[0-9]+}}
    210 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    211 entry:
    212   %0 = load i32, i32 addrspace(1)* %in
    213   store i32 %0, i32 addrspace(1)* %out
    214   ret void
    215 }
    216 
    217 ; load a f32 value from the global address space.
    218 ; FUNC-LABEL: {{^}}load_f32:
    219 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    220 
    221 ; SI: buffer_load_dword v{{[0-9]+}}
    222 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    223 entry:
    224   %0 = load float, float addrspace(1)* %in
    225   store float %0, float addrspace(1)* %out
    226   ret void
    227 }
    228 
    229 ; load a v2f32 value from the global address space
    230 ; FUNC-LABEL: {{^}}load_v2f32:
    231 ; R600: MEM_RAT
    232 ; R600: VTX_READ_64
    233 ; SI: buffer_load_dwordx2
    234 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
    235 entry:
    236   %0 = load <2 x float>, <2 x float> addrspace(1)* %in
    237   store <2 x float> %0, <2 x float> addrspace(1)* %out
    238   ret void
    239 }
    240 
    241 ; FUNC-LABEL: {{^}}load_i64:
    242 ; R600: VTX_READ_64
    243 ; SI: buffer_load_dwordx2
    244 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
    245 entry:
    246   %0 = load i64, i64 addrspace(1)* %in
    247   store i64 %0, i64 addrspace(1)* %out
    248   ret void
    249 }
    250 
    251 ; FUNC-LABEL: {{^}}load_i64_sext:
    252 ; R600: MEM_RAT
    253 ; R600: MEM_RAT
    254 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
    255 ; R600: 31
    256 ; SI: buffer_load_dword
    257 
    258 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    259 entry:
    260   %0 = load i32, i32 addrspace(1)* %in
    261   %1 = sext i32 %0 to i64
    262   store i64 %1, i64 addrspace(1)* %out
    263   ret void
    264 }
    265 
    266 ; FUNC-LABEL: {{^}}load_i64_zext:
    267 ; R600: MEM_RAT
    268 ; R600: MEM_RAT
    269 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    270 entry:
    271   %0 = load i32, i32 addrspace(1)* %in
    272   %1 = zext i32 %0 to i64
    273   store i64 %1, i64 addrspace(1)* %out
    274   ret void
    275 }
    276 
    277 ; FUNC-LABEL: {{^}}load_v8i32:
    278 ; R600: VTX_READ_128
    279 ; R600: VTX_READ_128
    280 
    281 ; SI: buffer_load_dwordx4
    282 ; SI: buffer_load_dwordx4
    283 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
    284 entry:
    285   %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
    286   store <8 x i32> %0, <8 x i32> addrspace(1)* %out
    287   ret void
    288 }
    289 
    290 ; FUNC-LABEL: {{^}}load_v16i32:
    291 ; R600: VTX_READ_128
    292 ; R600: VTX_READ_128
    293 ; R600: VTX_READ_128
    294 ; R600: VTX_READ_128
    295 
    296 ; SI: buffer_load_dwordx4
    297 ; SI: buffer_load_dwordx4
    298 ; SI: buffer_load_dwordx4
    299 ; SI: buffer_load_dwordx4
    300 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
    301 entry:
    302   %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
    303   store <16 x i32> %0, <16 x i32> addrspace(1)* %out
    304   ret void
    305 }
    306 
    307 ;===------------------------------------------------------------------------===;
    308 ; CONSTANT ADDRESS SPACE
    309 ;===------------------------------------------------------------------------===;
    310 
    311 ; Load a sign-extended i8 value
    312 ; FUNC-LABEL: {{^}}load_const_i8_sext:
    313 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    314 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
    315 ; R600: 8
    316 ; SI: buffer_load_sbyte v{{[0-9]+}},
    317 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    318 entry:
    319   %0 = load i8, i8 addrspace(2)* %in
    320   %1 = sext i8 %0 to i32
    321   store i32 %1, i32 addrspace(1)* %out
    322   ret void
    323 }
    324 
    325 ; Load an aligned i8 value
    326 ; FUNC-LABEL: {{^}}load_const_i8_aligned:
    327 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
    328 ; SI: buffer_load_ubyte v{{[0-9]+}},
    329 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    330 entry:
    331   %0 = load i8, i8 addrspace(2)* %in
    332   %1 = zext i8 %0 to i32
    333   store i32 %1, i32 addrspace(1)* %out
    334   ret void
    335 }
    336 
    337 ; Load an un-aligned i8 value
    338 ; FUNC-LABEL: {{^}}load_const_i8_unaligned:
    339 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
    340 ; SI: buffer_load_ubyte v{{[0-9]+}},
    341 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    342 entry:
    343   %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
    344   %1 = load i8, i8 addrspace(2)* %0
    345   %2 = zext i8 %1 to i32
    346   store i32 %2, i32 addrspace(1)* %out
    347   ret void
    348 }
    349 
    350 ; Load a sign-extended i16 value
    351 ; FUNC-LABEL: {{^}}load_const_i16_sext:
    352 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    353 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
    354 ; R600: 16
    355 ; SI: buffer_load_sshort
    356 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    357 entry:
    358   %0 = load i16, i16 addrspace(2)* %in
    359   %1 = sext i16 %0 to i32
    360   store i32 %1, i32 addrspace(1)* %out
    361   ret void
    362 }
    363 
    364 ; Load an aligned i16 value
    365 ; FUNC-LABEL: {{^}}load_const_i16_aligned:
    366 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    367 ; SI: buffer_load_ushort
    368 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    369 entry:
    370   %0 = load i16, i16 addrspace(2)* %in
    371   %1 = zext i16 %0 to i32
    372   store i32 %1, i32 addrspace(1)* %out
    373   ret void
    374 }
    375 
    376 ; Load an un-aligned i16 value
    377 ; FUNC-LABEL: {{^}}load_const_i16_unaligned:
    378 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    379 ; SI: buffer_load_ushort
    380 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    381 entry:
    382   %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
    383   %1 = load i16, i16 addrspace(2)* %0
    384   %2 = zext i16 %1 to i32
    385   store i32 %2, i32 addrspace(1)* %out
    386   ret void
    387 }
    388 
    389 ; Load an i32 value from the constant address space.
    390 ; FUNC-LABEL: {{^}}load_const_addrspace_i32:
    391 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    392 
    393 ; SI: s_load_dword s{{[0-9]+}}
    394 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    395 entry:
    396   %0 = load i32, i32 addrspace(2)* %in
    397   store i32 %0, i32 addrspace(1)* %out
    398   ret void
    399 }
    400 
    401 ; Load a f32 value from the constant address space.
    402 ; FUNC-LABEL: {{^}}load_const_addrspace_f32:
    403 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    404 
    405 ; SI: s_load_dword s{{[0-9]+}}
    406 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
    407   %1 = load float, float addrspace(2)* %in
    408   store float %1, float addrspace(1)* %out
    409   ret void
    410 }
    411 
    412 ;===------------------------------------------------------------------------===;
    413 ; LOCAL ADDRESS SPACE
    414 ;===------------------------------------------------------------------------===;
    415 
    416 ; Load an i8 value from the local address space.
    417 ; FUNC-LABEL: {{^}}load_i8_local:
    418 ; R600: LDS_UBYTE_READ_RET
    419 ; SI-NOT: s_wqm_b64
    420 ; SI: s_mov_b32 m0
    421 ; SI: ds_read_u8
    422 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
    423   %1 = load i8, i8 addrspace(3)* %in
    424   %2 = zext i8 %1 to i32
    425   store i32 %2, i32 addrspace(1)* %out
    426   ret void
    427 }
    428 
    429 ; FUNC-LABEL: {{^}}load_i8_sext_local:
    430 ; R600: LDS_UBYTE_READ_RET
    431 ; R600: BFE_INT
    432 ; SI-NOT: s_wqm_b64
    433 ; SI: s_mov_b32 m0
    434 ; SI: ds_read_i8
    435 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
    436 entry:
    437   %0 = load i8, i8 addrspace(3)* %in
    438   %1 = sext i8 %0 to i32
    439   store i32 %1, i32 addrspace(1)* %out
    440   ret void
    441 }
    442 
    443 ; FUNC-LABEL: {{^}}load_v2i8_local:
    444 ; R600: LDS_UBYTE_READ_RET
    445 ; R600: LDS_UBYTE_READ_RET
    446 ; SI-NOT: s_wqm_b64
    447 ; SI: s_mov_b32 m0
    448 ; SI: ds_read_u8
    449 ; SI: ds_read_u8
    450 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
    451 entry:
    452   %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
    453   %1 = zext <2 x i8> %0 to <2 x i32>
    454   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    455   ret void
    456 }
    457 
    458 ; FUNC-LABEL: {{^}}load_v2i8_sext_local:
    459 ; R600-DAG: LDS_UBYTE_READ_RET
    460 ; R600-DAG: LDS_UBYTE_READ_RET
    461 ; R600-DAG: BFE_INT
    462 ; R600-DAG: BFE_INT
    463 ; SI-NOT: s_wqm_b64
    464 ; SI: s_mov_b32 m0
    465 ; SI: ds_read_i8
    466 ; SI: ds_read_i8
    467 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
    468 entry:
    469   %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
    470   %1 = sext <2 x i8> %0 to <2 x i32>
    471   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    472   ret void
    473 }
    474 
    475 ; FUNC-LABEL: {{^}}load_v4i8_local:
    476 ; R600: LDS_UBYTE_READ_RET
    477 ; R600: LDS_UBYTE_READ_RET
    478 ; R600: LDS_UBYTE_READ_RET
    479 ; R600: LDS_UBYTE_READ_RET
    480 ; SI-NOT: s_wqm_b64
    481 ; SI: s_mov_b32 m0
    482 ; SI: ds_read_u8
    483 ; SI: ds_read_u8
    484 ; SI: ds_read_u8
    485 ; SI: ds_read_u8
    486 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
    487 entry:
    488   %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
    489   %1 = zext <4 x i8> %0 to <4 x i32>
    490   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    491   ret void
    492 }
    493 
    494 ; FUNC-LABEL: {{^}}load_v4i8_sext_local:
    495 ; R600-DAG: LDS_UBYTE_READ_RET
    496 ; R600-DAG: LDS_UBYTE_READ_RET
    497 ; R600-DAG: LDS_UBYTE_READ_RET
    498 ; R600-DAG: LDS_UBYTE_READ_RET
    499 ; R600-DAG: BFE_INT
    500 ; R600-DAG: BFE_INT
    501 ; R600-DAG: BFE_INT
    502 ; R600-DAG: BFE_INT
    503 ; SI-NOT: s_wqm_b64
    504 ; SI: s_mov_b32 m0
    505 ; SI: ds_read_i8
    506 ; SI: ds_read_i8
    507 ; SI: ds_read_i8
    508 ; SI: ds_read_i8
    509 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
    510 entry:
    511   %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
    512   %1 = sext <4 x i8> %0 to <4 x i32>
    513   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    514   ret void
    515 }
    516 
    517 ; Load an i16 value from the local address space.
    518 ; FUNC-LABEL: {{^}}load_i16_local:
    519 ; R600: LDS_USHORT_READ_RET
    520 ; SI-NOT: s_wqm_b64
    521 ; SI: s_mov_b32 m0
    522 ; SI: ds_read_u16
    523 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
    524 entry:
    525   %0 = load i16	, i16	 addrspace(3)* %in
    526   %1 = zext i16 %0 to i32
    527   store i32 %1, i32 addrspace(1)* %out
    528   ret void
    529 }
    530 
    531 ; FUNC-LABEL: {{^}}load_i16_sext_local:
    532 ; R600: LDS_USHORT_READ_RET
    533 ; R600: BFE_INT
    534 ; SI-NOT: s_wqm_b64
    535 ; SI: s_mov_b32 m0
    536 ; SI: ds_read_i16
    537 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
    538 entry:
    539   %0 = load i16, i16 addrspace(3)* %in
    540   %1 = sext i16 %0 to i32
    541   store i32 %1, i32 addrspace(1)* %out
    542   ret void
    543 }
    544 
    545 ; FUNC-LABEL: {{^}}load_v2i16_local:
    546 ; R600: LDS_USHORT_READ_RET
    547 ; R600: LDS_USHORT_READ_RET
    548 ; SI-NOT: s_wqm_b64
    549 ; SI: s_mov_b32 m0
    550 ; SI: ds_read_u16
    551 ; SI: ds_read_u16
    552 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
    553 entry:
    554   %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
    555   %1 = zext <2 x i16> %0 to <2 x i32>
    556   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    557   ret void
    558 }
    559 
    560 ; FUNC-LABEL: {{^}}load_v2i16_sext_local:
    561 ; R600-DAG: LDS_USHORT_READ_RET
    562 ; R600-DAG: LDS_USHORT_READ_RET
    563 ; R600-DAG: BFE_INT
    564 ; R600-DAG: BFE_INT
    565 ; SI-NOT: s_wqm_b64
    566 ; SI: s_mov_b32 m0
    567 ; SI: ds_read_i16
    568 ; SI: ds_read_i16
    569 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
    570 entry:
    571   %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
    572   %1 = sext <2 x i16> %0 to <2 x i32>
    573   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    574   ret void
    575 }
    576 
    577 ; FUNC-LABEL: {{^}}load_v4i16_local:
    578 ; R600: LDS_USHORT_READ_RET
    579 ; R600: LDS_USHORT_READ_RET
    580 ; R600: LDS_USHORT_READ_RET
    581 ; R600: LDS_USHORT_READ_RET
    582 ; SI-NOT: s_wqm_b64
    583 ; SI: s_mov_b32 m0
    584 ; SI: ds_read_u16
    585 ; SI: ds_read_u16
    586 ; SI: ds_read_u16
    587 ; SI: ds_read_u16
    588 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
    589 entry:
    590   %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
    591   %1 = zext <4 x i16> %0 to <4 x i32>
    592   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    593   ret void
    594 }
    595 
    596 ; FUNC-LABEL: {{^}}load_v4i16_sext_local:
    597 ; R600-DAG: LDS_USHORT_READ_RET
    598 ; R600-DAG: LDS_USHORT_READ_RET
    599 ; R600-DAG: LDS_USHORT_READ_RET
    600 ; R600-DAG: LDS_USHORT_READ_RET
    601 ; R600-DAG: BFE_INT
    602 ; R600-DAG: BFE_INT
    603 ; R600-DAG: BFE_INT
    604 ; R600-DAG: BFE_INT
    605 ; SI-NOT: s_wqm_b64
    606 ; SI: s_mov_b32 m0
    607 ; SI: ds_read_i16
    608 ; SI: ds_read_i16
    609 ; SI: ds_read_i16
    610 ; SI: ds_read_i16
    611 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
    612 entry:
    613   %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
    614   %1 = sext <4 x i16> %0 to <4 x i32>
    615   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    616   ret void
    617 }
    618 
    619 ; load an i32 value from the local address space.
    620 ; FUNC-LABEL: {{^}}load_i32_local:
    621 ; R600: LDS_READ_RET
    622 ; SI-NOT: s_wqm_b64
    623 ; SI: s_mov_b32 m0
    624 ; SI: ds_read_b32
    625 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
    626 entry:
    627   %0 = load i32, i32 addrspace(3)* %in
    628   store i32 %0, i32 addrspace(1)* %out
    629   ret void
    630 }
    631 
    632 ; load a f32 value from the local address space.
    633 ; FUNC-LABEL: {{^}}load_f32_local:
    634 ; R600: LDS_READ_RET
    635 ; SI: s_mov_b32 m0
    636 ; SI: ds_read_b32
    637 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
    638 entry:
    639   %0 = load float, float addrspace(3)* %in
    640   store float %0, float addrspace(1)* %out
    641   ret void
    642 }
    643 
    644 ; load a v2f32 value from the local address space
    645 ; FUNC-LABEL: {{^}}load_v2f32_local:
    646 ; R600: LDS_READ_RET
    647 ; R600: LDS_READ_RET
    648 ; SI: s_mov_b32 m0
    649 ; SI: ds_read_b64
    650 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
    651 entry:
    652   %0 = load <2 x float>, <2 x float> addrspace(3)* %in
    653   store <2 x float> %0, <2 x float> addrspace(1)* %out
    654   ret void
    655 }
    656 
    657 ; Test loading a i32 and v2i32 value from the same base pointer.
    658 ; FUNC-LABEL: {{^}}load_i32_v2i32_local:
    659 ; R600: LDS_READ_RET
    660 ; R600: LDS_READ_RET
    661 ; R600: LDS_READ_RET
    662 ; SI-DAG: ds_read_b32
    663 ; SI-DAG: ds_read2_b32
    664 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
    665   %scalar = load i32, i32 addrspace(3)* %in
    666   %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
    667   %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
    668   %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
    669   %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
    670   %vec = add <2 x i32> %vec0, %vec1
    671   store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
    672   ret void
    673 }
    674 
    675 
    676 @lds = addrspace(3) global [512 x i32] undef, align 4
    677 
    678 ; On SI we need to make sure that the base offset is a register and not
    679 ; an immediate.
    680 ; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
    681 ; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
    682 ; SI: ds_read_b32 v0, v[[ZERO]] offset:4
    683 ; R600: LDS_READ_RET
    684 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
    685 entry:
    686   %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
    687   %tmp1 = load i32, i32 addrspace(3)* %tmp0
    688   %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
    689   store i32 %tmp1, i32 addrspace(1)* %tmp2
    690   ret void
    691 }
    692