Home | History | Annotate | Download | only in R600
      1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
      2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
      3 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
      4 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
      5 
      6 ;===------------------------------------------------------------------------===;
      7 ; GLOBAL ADDRESS SPACE
      8 ;===------------------------------------------------------------------------===;
      9 
     10 ; Load an i8 value from the global address space.
     11 ; FUNC-LABEL: {{^}}load_i8:
     12 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
     13 
     14 ; SI: buffer_load_ubyte v{{[0-9]+}},
     15 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     16   %1 = load i8, i8 addrspace(1)* %in
     17   %2 = zext i8 %1 to i32
     18   store i32 %2, i32 addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; FUNC-LABEL: {{^}}load_i8_sext:
     23 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
     24 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
     25 ; R600: 24
     26 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
     27 ; R600: 24
     28 ; SI: buffer_load_sbyte
     29 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     30 entry:
     31   %0 = load i8, i8 addrspace(1)* %in
     32   %1 = sext i8 %0 to i32
     33   store i32 %1, i32 addrspace(1)* %out
     34   ret void
     35 }
     36 
     37 ; FUNC-LABEL: {{^}}load_v2i8:
     38 ; R600: VTX_READ_8
     39 ; R600: VTX_READ_8
     40 ; SI: buffer_load_ubyte
     41 ; SI: buffer_load_ubyte
     42 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
     43 entry:
     44   %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
     45   %1 = zext <2 x i8> %0 to <2 x i32>
     46   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
     47   ret void
     48 }
     49 
     50 ; FUNC-LABEL: {{^}}load_v2i8_sext:
     51 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
     52 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
     53 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
     54 ; R600-DAG: 24
     55 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
     56 ; R600-DAG: 24
     57 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
     58 ; R600-DAG: 24
     59 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
     60 ; R600-DAG: 24
     61 ; SI: buffer_load_sbyte
     62 ; SI: buffer_load_sbyte
     63 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
     64 entry:
     65   %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
     66   %1 = sext <2 x i8> %0 to <2 x i32>
     67   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
     68   ret void
     69 }
     70 
     71 ; FUNC-LABEL: {{^}}load_v4i8:
     72 ; R600: VTX_READ_8
     73 ; R600: VTX_READ_8
     74 ; R600: VTX_READ_8
     75 ; R600: VTX_READ_8
     76 ; SI: buffer_load_ubyte
     77 ; SI: buffer_load_ubyte
     78 ; SI: buffer_load_ubyte
     79 ; SI: buffer_load_ubyte
     80 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
     81 entry:
     82   %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
     83   %1 = zext <4 x i8> %0 to <4 x i32>
     84   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
     85   ret void
     86 }
     87 
     88 ; FUNC-LABEL: {{^}}load_v4i8_sext:
     89 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
     90 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
     91 ; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
     92 ; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
     93 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
     94 ; R600-DAG: 24
     95 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
     96 ; R600-DAG: 24
     97 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
     98 ; R600-DAG: 24
     99 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
    100 ; R600-DAG: 24
    101 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
    102 ; R600-DAG: 24
    103 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
    104 ; R600-DAG: 24
    105 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
    106 ; R600-DAG: 24
    107 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
    108 ; R600-DAG: 24
    109 ; SI: buffer_load_sbyte
    110 ; SI: buffer_load_sbyte
    111 ; SI: buffer_load_sbyte
    112 ; SI: buffer_load_sbyte
    113 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
    114 entry:
    115   %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
    116   %1 = sext <4 x i8> %0 to <4 x i32>
    117   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    118   ret void
    119 }
    120 
    121 ; Load an i16 value from the global address space.
    122 ; FUNC-LABEL: {{^}}load_i16:
    123 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    124 ; SI: buffer_load_ushort
    125 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
    126 entry:
    127   %0 = load i16	, i16	 addrspace(1)* %in
    128   %1 = zext i16 %0 to i32
    129   store i32 %1, i32 addrspace(1)* %out
    130   ret void
    131 }
    132 
    133 ; FUNC-LABEL: {{^}}load_i16_sext:
    134 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    135 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
    136 ; R600: 16
    137 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
    138 ; R600: 16
    139 ; SI: buffer_load_sshort
    140 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
    141 entry:
    142   %0 = load i16, i16 addrspace(1)* %in
    143   %1 = sext i16 %0 to i32
    144   store i32 %1, i32 addrspace(1)* %out
    145   ret void
    146 }
    147 
    148 ; FUNC-LABEL: {{^}}load_v2i16:
    149 ; R600: VTX_READ_16
    150 ; R600: VTX_READ_16
    151 ; SI: buffer_load_ushort
    152 ; SI: buffer_load_ushort
    153 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
    154 entry:
    155   %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
    156   %1 = zext <2 x i16> %0 to <2 x i32>
    157   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    158   ret void
    159 }
    160 
    161 ; FUNC-LABEL: {{^}}load_v2i16_sext:
    162 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    163 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    164 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
    165 ; R600-DAG: 16
    166 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
    167 ; R600-DAG: 16
    168 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
    169 ; R600-DAG: 16
    170 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
    171 ; R600-DAG: 16
    172 ; SI: buffer_load_sshort
    173 ; SI: buffer_load_sshort
    174 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
    175 entry:
    176   %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
    177   %1 = sext <2 x i16> %0 to <2 x i32>
    178   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    179   ret void
    180 }
    181 
    182 ; FUNC-LABEL: {{^}}load_v4i16:
    183 ; R600: VTX_READ_16
    184 ; R600: VTX_READ_16
    185 ; R600: VTX_READ_16
    186 ; R600: VTX_READ_16
    187 ; SI: buffer_load_ushort
    188 ; SI: buffer_load_ushort
    189 ; SI: buffer_load_ushort
    190 ; SI: buffer_load_ushort
    191 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
    192 entry:
    193   %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
    194   %1 = zext <4 x i16> %0 to <4 x i32>
    195   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    196   ret void
    197 }
    198 
    199 ; FUNC-LABEL: {{^}}load_v4i16_sext:
    200 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    201 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    202 ; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
    203 ; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
    204 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
    205 ; R600-DAG: 16
    206 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
    207 ; R600-DAG: 16
    208 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
    209 ; R600-DAG: 16
    210 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
    211 ; R600-DAG: 16
    212 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
    213 ; R600-DAG: 16
    214 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
    215 ; R600-DAG: 16
    216 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
    217 ; R600-DAG: 16
    218 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
    219 ; R600-DAG: 16
    220 ; SI: buffer_load_sshort
    221 ; SI: buffer_load_sshort
    222 ; SI: buffer_load_sshort
    223 ; SI: buffer_load_sshort
    224 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
    225 entry:
    226   %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
    227   %1 = sext <4 x i16> %0 to <4 x i32>
    228   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    229   ret void
    230 }
    231 
    232 ; load an i32 value from the global address space.
    233 ; FUNC-LABEL: {{^}}load_i32:
    234 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    235 
    236 ; SI: buffer_load_dword v{{[0-9]+}}
    237 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    238 entry:
    239   %0 = load i32, i32 addrspace(1)* %in
    240   store i32 %0, i32 addrspace(1)* %out
    241   ret void
    242 }
    243 
    244 ; load a f32 value from the global address space.
    245 ; FUNC-LABEL: {{^}}load_f32:
    246 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    247 
    248 ; SI: buffer_load_dword v{{[0-9]+}}
    249 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    250 entry:
    251   %0 = load float, float addrspace(1)* %in
    252   store float %0, float addrspace(1)* %out
    253   ret void
    254 }
    255 
    256 ; load a v2f32 value from the global address space
    257 ; FUNC-LABEL: {{^}}load_v2f32:
    258 ; R600: MEM_RAT
    259 ; R600: VTX_READ_64
    260 ; SI: buffer_load_dwordx2
    261 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
    262 entry:
    263   %0 = load <2 x float>, <2 x float> addrspace(1)* %in
    264   store <2 x float> %0, <2 x float> addrspace(1)* %out
    265   ret void
    266 }
    267 
    268 ; FUNC-LABEL: {{^}}load_i64:
    269 ; R600: VTX_READ_64
    270 ; SI: buffer_load_dwordx2
    271 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
    272 entry:
    273   %0 = load i64, i64 addrspace(1)* %in
    274   store i64 %0, i64 addrspace(1)* %out
    275   ret void
    276 }
    277 
    278 ; FUNC-LABEL: {{^}}load_i64_sext:
    279 ; R600: MEM_RAT
    280 ; R600: MEM_RAT
    281 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
    282 ; R600: 31
    283 ; SI: buffer_load_dword
    284 
    285 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    286 entry:
    287   %0 = load i32, i32 addrspace(1)* %in
    288   %1 = sext i32 %0 to i64
    289   store i64 %1, i64 addrspace(1)* %out
    290   ret void
    291 }
    292 
    293 ; FUNC-LABEL: {{^}}load_i64_zext:
    294 ; R600: MEM_RAT
    295 ; R600: MEM_RAT
    296 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    297 entry:
    298   %0 = load i32, i32 addrspace(1)* %in
    299   %1 = zext i32 %0 to i64
    300   store i64 %1, i64 addrspace(1)* %out
    301   ret void
    302 }
    303 
    304 ; FUNC-LABEL: {{^}}load_v8i32:
    305 ; R600: VTX_READ_128
    306 ; R600: VTX_READ_128
    307 ; XXX: We should be using DWORDX4 instructions on SI.
    308 ; SI: buffer_load_dword
    309 ; SI: buffer_load_dword
    310 ; SI: buffer_load_dword
    311 ; SI: buffer_load_dword
    312 ; SI: buffer_load_dword
    313 ; SI: buffer_load_dword
    314 ; SI: buffer_load_dword
    315 ; SI: buffer_load_dword
    316 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
    317 entry:
    318   %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
    319   store <8 x i32> %0, <8 x i32> addrspace(1)* %out
    320   ret void
    321 }
    322 
    323 ; FUNC-LABEL: {{^}}load_v16i32:
    324 ; R600: VTX_READ_128
    325 ; R600: VTX_READ_128
    326 ; R600: VTX_READ_128
    327 ; R600: VTX_READ_128
    328 ; XXX: We should be using DWORDX4 instructions on SI.
    329 ; SI: buffer_load_dword
    330 ; SI: buffer_load_dword
    331 ; SI: buffer_load_dword
    332 ; SI: buffer_load_dword
    333 ; SI: buffer_load_dword
    334 ; SI: buffer_load_dword
    335 ; SI: buffer_load_dword
    336 ; SI: buffer_load_dword
    337 ; SI: buffer_load_dword
    338 ; SI: buffer_load_dword
    339 ; SI: buffer_load_dword
    340 ; SI: buffer_load_dword
    341 ; SI: buffer_load_dword
    342 ; SI: buffer_load_dword
    343 ; SI: buffer_load_dword
    344 ; SI: buffer_load_dword
    345 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
    346 entry:
    347   %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
    348   store <16 x i32> %0, <16 x i32> addrspace(1)* %out
    349   ret void
    350 }
    351 
    352 ;===------------------------------------------------------------------------===;
    353 ; CONSTANT ADDRESS SPACE
    354 ;===------------------------------------------------------------------------===;
    355 
    356 ; Load a sign-extended i8 value
    357 ; FUNC-LABEL: {{^}}load_const_i8_sext:
    358 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    359 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
    360 ; R600: 24
    361 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
    362 ; R600: 24
    363 ; SI: buffer_load_sbyte v{{[0-9]+}},
    364 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    365 entry:
    366   %0 = load i8, i8 addrspace(2)* %in
    367   %1 = sext i8 %0 to i32
    368   store i32 %1, i32 addrspace(1)* %out
    369   ret void
    370 }
    371 
    372 ; Load an aligned i8 value
    373 ; FUNC-LABEL: {{^}}load_const_i8_aligned:
    374 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
    375 ; SI: buffer_load_ubyte v{{[0-9]+}},
    376 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    377 entry:
    378   %0 = load i8, i8 addrspace(2)* %in
    379   %1 = zext i8 %0 to i32
    380   store i32 %1, i32 addrspace(1)* %out
    381   ret void
    382 }
    383 
    384 ; Load an un-aligned i8 value
    385 ; FUNC-LABEL: {{^}}load_const_i8_unaligned:
    386 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
    387 ; SI: buffer_load_ubyte v{{[0-9]+}},
    388 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    389 entry:
    390   %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
    391   %1 = load i8, i8 addrspace(2)* %0
    392   %2 = zext i8 %1 to i32
    393   store i32 %2, i32 addrspace(1)* %out
    394   ret void
    395 }
    396 
    397 ; Load a sign-extended i16 value
    398 ; FUNC-LABEL: {{^}}load_const_i16_sext:
    399 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    400 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
    401 ; R600: 16
    402 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
    403 ; R600: 16
    404 ; SI: buffer_load_sshort
    405 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    406 entry:
    407   %0 = load i16, i16 addrspace(2)* %in
    408   %1 = sext i16 %0 to i32
    409   store i32 %1, i32 addrspace(1)* %out
    410   ret void
    411 }
    412 
    413 ; Load an aligned i16 value
    414 ; FUNC-LABEL: {{^}}load_const_i16_aligned:
    415 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    416 ; SI: buffer_load_ushort
    417 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    418 entry:
    419   %0 = load i16, i16 addrspace(2)* %in
    420   %1 = zext i16 %0 to i32
    421   store i32 %1, i32 addrspace(1)* %out
    422   ret void
    423 }
    424 
    425 ; Load an un-aligned i16 value
    426 ; FUNC-LABEL: {{^}}load_const_i16_unaligned:
    427 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    428 ; SI: buffer_load_ushort
    429 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    430 entry:
    431   %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
    432   %1 = load i16, i16 addrspace(2)* %0
    433   %2 = zext i16 %1 to i32
    434   store i32 %2, i32 addrspace(1)* %out
    435   ret void
    436 }
    437 
    438 ; Load an i32 value from the constant address space.
    439 ; FUNC-LABEL: {{^}}load_const_addrspace_i32:
    440 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    441 
    442 ; SI: s_load_dword s{{[0-9]+}}
    443 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    444 entry:
    445   %0 = load i32, i32 addrspace(2)* %in
    446   store i32 %0, i32 addrspace(1)* %out
    447   ret void
    448 }
    449 
    450 ; Load a f32 value from the constant address space.
    451 ; FUNC-LABEL: {{^}}load_const_addrspace_f32:
    452 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    453 
    454 ; SI: s_load_dword s{{[0-9]+}}
    455 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
    456   %1 = load float, float addrspace(2)* %in
    457   store float %1, float addrspace(1)* %out
    458   ret void
    459 }
    460 
    461 ;===------------------------------------------------------------------------===;
    462 ; LOCAL ADDRESS SPACE
    463 ;===------------------------------------------------------------------------===;
    464 
    465 ; Load an i8 value from the local address space.
    466 ; FUNC-LABEL: {{^}}load_i8_local:
    467 ; R600: LDS_UBYTE_READ_RET
    468 ; SI-NOT: s_wqm_b64
    469 ; SI: s_mov_b32 m0
    470 ; SI: ds_read_u8
    471 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
    472   %1 = load i8, i8 addrspace(3)* %in
    473   %2 = zext i8 %1 to i32
    474   store i32 %2, i32 addrspace(1)* %out
    475   ret void
    476 }
    477 
    478 ; FUNC-LABEL: {{^}}load_i8_sext_local:
    479 ; R600: LDS_UBYTE_READ_RET
    480 ; R600: ASHR
    481 ; SI-NOT: s_wqm_b64
    482 ; SI: s_mov_b32 m0
    483 ; SI: ds_read_i8
    484 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
    485 entry:
    486   %0 = load i8, i8 addrspace(3)* %in
    487   %1 = sext i8 %0 to i32
    488   store i32 %1, i32 addrspace(1)* %out
    489   ret void
    490 }
    491 
    492 ; FUNC-LABEL: {{^}}load_v2i8_local:
    493 ; R600: LDS_UBYTE_READ_RET
    494 ; R600: LDS_UBYTE_READ_RET
    495 ; SI-NOT: s_wqm_b64
    496 ; SI: s_mov_b32 m0
    497 ; SI: ds_read_u8
    498 ; SI: ds_read_u8
    499 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
    500 entry:
    501   %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
    502   %1 = zext <2 x i8> %0 to <2 x i32>
    503   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    504   ret void
    505 }
    506 
    507 ; FUNC-LABEL: {{^}}load_v2i8_sext_local:
    508 ; R600-DAG: LDS_UBYTE_READ_RET
    509 ; R600-DAG: LDS_UBYTE_READ_RET
    510 ; R600-DAG: ASHR
    511 ; R600-DAG: ASHR
    512 ; SI-NOT: s_wqm_b64
    513 ; SI: s_mov_b32 m0
    514 ; SI: ds_read_i8
    515 ; SI: ds_read_i8
    516 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
    517 entry:
    518   %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
    519   %1 = sext <2 x i8> %0 to <2 x i32>
    520   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    521   ret void
    522 }
    523 
    524 ; FUNC-LABEL: {{^}}load_v4i8_local:
    525 ; R600: LDS_UBYTE_READ_RET
    526 ; R600: LDS_UBYTE_READ_RET
    527 ; R600: LDS_UBYTE_READ_RET
    528 ; R600: LDS_UBYTE_READ_RET
    529 ; SI-NOT: s_wqm_b64
    530 ; SI: s_mov_b32 m0
    531 ; SI: ds_read_u8
    532 ; SI: ds_read_u8
    533 ; SI: ds_read_u8
    534 ; SI: ds_read_u8
    535 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
    536 entry:
    537   %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
    538   %1 = zext <4 x i8> %0 to <4 x i32>
    539   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    540   ret void
    541 }
    542 
    543 ; FUNC-LABEL: {{^}}load_v4i8_sext_local:
    544 ; R600-DAG: LDS_UBYTE_READ_RET
    545 ; R600-DAG: LDS_UBYTE_READ_RET
    546 ; R600-DAG: LDS_UBYTE_READ_RET
    547 ; R600-DAG: LDS_UBYTE_READ_RET
    548 ; R600-DAG: ASHR
    549 ; R600-DAG: ASHR
    550 ; R600-DAG: ASHR
    551 ; R600-DAG: ASHR
    552 ; SI-NOT: s_wqm_b64
    553 ; SI: s_mov_b32 m0
    554 ; SI: ds_read_i8
    555 ; SI: ds_read_i8
    556 ; SI: ds_read_i8
    557 ; SI: ds_read_i8
    558 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
    559 entry:
    560   %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
    561   %1 = sext <4 x i8> %0 to <4 x i32>
    562   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    563   ret void
    564 }
    565 
    566 ; Load an i16 value from the local address space.
    567 ; FUNC-LABEL: {{^}}load_i16_local:
    568 ; R600: LDS_USHORT_READ_RET
    569 ; SI-NOT: s_wqm_b64
    570 ; SI: s_mov_b32 m0
    571 ; SI: ds_read_u16
    572 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
    573 entry:
    574   %0 = load i16	, i16	 addrspace(3)* %in
    575   %1 = zext i16 %0 to i32
    576   store i32 %1, i32 addrspace(1)* %out
    577   ret void
    578 }
    579 
    580 ; FUNC-LABEL: {{^}}load_i16_sext_local:
    581 ; R600: LDS_USHORT_READ_RET
    582 ; R600: ASHR
    583 ; SI-NOT: s_wqm_b64
    584 ; SI: s_mov_b32 m0
    585 ; SI: ds_read_i16
    586 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
    587 entry:
    588   %0 = load i16, i16 addrspace(3)* %in
    589   %1 = sext i16 %0 to i32
    590   store i32 %1, i32 addrspace(1)* %out
    591   ret void
    592 }
    593 
    594 ; FUNC-LABEL: {{^}}load_v2i16_local:
    595 ; R600: LDS_USHORT_READ_RET
    596 ; R600: LDS_USHORT_READ_RET
    597 ; SI-NOT: s_wqm_b64
    598 ; SI: s_mov_b32 m0
    599 ; SI: ds_read_u16
    600 ; SI: ds_read_u16
    601 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
    602 entry:
    603   %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
    604   %1 = zext <2 x i16> %0 to <2 x i32>
    605   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    606   ret void
    607 }
    608 
    609 ; FUNC-LABEL: {{^}}load_v2i16_sext_local:
    610 ; R600-DAG: LDS_USHORT_READ_RET
    611 ; R600-DAG: LDS_USHORT_READ_RET
    612 ; R600-DAG: ASHR
    613 ; R600-DAG: ASHR
    614 ; SI-NOT: s_wqm_b64
    615 ; SI: s_mov_b32 m0
    616 ; SI: ds_read_i16
    617 ; SI: ds_read_i16
    618 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
    619 entry:
    620   %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
    621   %1 = sext <2 x i16> %0 to <2 x i32>
    622   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    623   ret void
    624 }
    625 
    626 ; FUNC-LABEL: {{^}}load_v4i16_local:
    627 ; R600: LDS_USHORT_READ_RET
    628 ; R600: LDS_USHORT_READ_RET
    629 ; R600: LDS_USHORT_READ_RET
    630 ; R600: LDS_USHORT_READ_RET
    631 ; SI-NOT: s_wqm_b64
    632 ; SI: s_mov_b32 m0
    633 ; SI: ds_read_u16
    634 ; SI: ds_read_u16
    635 ; SI: ds_read_u16
    636 ; SI: ds_read_u16
    637 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
    638 entry:
    639   %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
    640   %1 = zext <4 x i16> %0 to <4 x i32>
    641   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    642   ret void
    643 }
    644 
    645 ; FUNC-LABEL: {{^}}load_v4i16_sext_local:
    646 ; R600-DAG: LDS_USHORT_READ_RET
    647 ; R600-DAG: LDS_USHORT_READ_RET
    648 ; R600-DAG: LDS_USHORT_READ_RET
    649 ; R600-DAG: LDS_USHORT_READ_RET
    650 ; R600-DAG: ASHR
    651 ; R600-DAG: ASHR
    652 ; R600-DAG: ASHR
    653 ; R600-DAG: ASHR
    654 ; SI-NOT: s_wqm_b64
    655 ; SI: s_mov_b32 m0
    656 ; SI: ds_read_i16
    657 ; SI: ds_read_i16
    658 ; SI: ds_read_i16
    659 ; SI: ds_read_i16
    660 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
    661 entry:
    662   %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
    663   %1 = sext <4 x i16> %0 to <4 x i32>
    664   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    665   ret void
    666 }
    667 
    668 ; load an i32 value from the local address space.
    669 ; FUNC-LABEL: {{^}}load_i32_local:
    670 ; R600: LDS_READ_RET
    671 ; SI-NOT: s_wqm_b64
    672 ; SI: s_mov_b32 m0
    673 ; SI: ds_read_b32
    674 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
    675 entry:
    676   %0 = load i32, i32 addrspace(3)* %in
    677   store i32 %0, i32 addrspace(1)* %out
    678   ret void
    679 }
    680 
    681 ; load a f32 value from the local address space.
    682 ; FUNC-LABEL: {{^}}load_f32_local:
    683 ; R600: LDS_READ_RET
    684 ; SI: s_mov_b32 m0
    685 ; SI: ds_read_b32
    686 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
    687 entry:
    688   %0 = load float, float addrspace(3)* %in
    689   store float %0, float addrspace(1)* %out
    690   ret void
    691 }
    692 
    693 ; load a v2f32 value from the local address space
    694 ; FUNC-LABEL: {{^}}load_v2f32_local:
    695 ; R600: LDS_READ_RET
    696 ; R600: LDS_READ_RET
    697 ; SI: s_mov_b32 m0
    698 ; SI: ds_read_b64
    699 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
    700 entry:
    701   %0 = load <2 x float>, <2 x float> addrspace(3)* %in
    702   store <2 x float> %0, <2 x float> addrspace(1)* %out
    703   ret void
    704 }
    705 
    706 ; Test loading a i32 and v2i32 value from the same base pointer.
    707 ; FUNC-LABEL: {{^}}load_i32_v2i32_local:
    708 ; R600: LDS_READ_RET
    709 ; R600: LDS_READ_RET
    710 ; R600: LDS_READ_RET
    711 ; SI-DAG: ds_read_b32
    712 ; SI-DAG: ds_read2_b32
    713 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
    714   %scalar = load i32, i32 addrspace(3)* %in
    715   %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
    716   %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
    717   %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
    718   %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
    719   %vec = add <2 x i32> %vec0, %vec1
    720   store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
    721   ret void
    722 }
    723 
    724 
    725 @lds = addrspace(3) global [512 x i32] undef, align 4
    726 
    727 ; On SI we need to make sure that the base offset is a register and not
    728 ; an immediate.
    729 ; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
    730 ; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
    731 ; SI: ds_read_b32 v0, v[[ZERO]] offset:4
    732 ; R600: LDS_READ_RET
    733 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
    734 entry:
    735   %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
    736   %tmp1 = load i32, i32 addrspace(3)* %tmp0
    737   %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
    738   store i32 %tmp1, i32 addrspace(1)* %tmp2
    739   ret void
    740 }
    741