Home | History | Annotate | Download | only in R600
      1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
      2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
      3 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
      4 
      5 ;===------------------------------------------------------------------------===;
      6 ; GLOBAL ADDRESS SPACE
      7 ;===------------------------------------------------------------------------===;
      8 
      9 ; Load an i8 value from the global address space.
     10 ; FUNC-LABEL: @load_i8
     11 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
     12 
     13 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
     14 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     15   %1 = load i8 addrspace(1)* %in
     16   %2 = zext i8 %1 to i32
     17   store i32 %2, i32 addrspace(1)* %out
     18   ret void
     19 }
     20 
     21 ; FUNC-LABEL: @load_i8_sext
     22 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
     23 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
     24 ; R600-CHECK: 24
     25 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
     26 ; R600-CHECK: 24
     27 ; SI-CHECK: BUFFER_LOAD_SBYTE
     28 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     29 entry:
     30   %0 = load i8 addrspace(1)* %in
     31   %1 = sext i8 %0 to i32
     32   store i32 %1, i32 addrspace(1)* %out
     33   ret void
     34 }
     35 
     36 ; FUNC-LABEL: @load_v2i8
     37 ; R600-CHECK: VTX_READ_8
     38 ; R600-CHECK: VTX_READ_8
     39 ; SI-CHECK: BUFFER_LOAD_UBYTE
     40 ; SI-CHECK: BUFFER_LOAD_UBYTE
     41 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
     42 entry:
     43   %0 = load <2 x i8> addrspace(1)* %in
     44   %1 = zext <2 x i8> %0 to <2 x i32>
     45   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
     46   ret void
     47 }
     48 
     49 ; FUNC-LABEL: @load_v2i8_sext
     50 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
     51 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
     52 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
     53 ; R600-CHECK-DAG: 24
     54 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
     55 ; R600-CHECK-DAG: 24
     56 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
     57 ; R600-CHECK-DAG: 24
     58 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
     59 ; R600-CHECK-DAG: 24
     60 ; SI-CHECK: BUFFER_LOAD_SBYTE
     61 ; SI-CHECK: BUFFER_LOAD_SBYTE
     62 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
     63 entry:
     64   %0 = load <2 x i8> addrspace(1)* %in
     65   %1 = sext <2 x i8> %0 to <2 x i32>
     66   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; FUNC-LABEL: @load_v4i8
     71 ; R600-CHECK: VTX_READ_8
     72 ; R600-CHECK: VTX_READ_8
     73 ; R600-CHECK: VTX_READ_8
     74 ; R600-CHECK: VTX_READ_8
     75 ; SI-CHECK: BUFFER_LOAD_UBYTE
     76 ; SI-CHECK: BUFFER_LOAD_UBYTE
     77 ; SI-CHECK: BUFFER_LOAD_UBYTE
     78 ; SI-CHECK: BUFFER_LOAD_UBYTE
     79 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
     80 entry:
     81   %0 = load <4 x i8> addrspace(1)* %in
     82   %1 = zext <4 x i8> %0 to <4 x i32>
     83   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
     84   ret void
     85 }
     86 
     87 ; FUNC-LABEL: @load_v4i8_sext
     88 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
     89 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
     90 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
     91 ; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
     92 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
     93 ; R600-CHECK-DAG: 24
     94 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
     95 ; R600-CHECK-DAG: 24
     96 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
     97 ; R600-CHECK-DAG: 24
     98 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
     99 ; R600-CHECK-DAG: 24
    100 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
    101 ; R600-CHECK-DAG: 24
    102 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
    103 ; R600-CHECK-DAG: 24
    104 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
    105 ; R600-CHECK-DAG: 24
    106 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
    107 ; R600-CHECK-DAG: 24
    108 ; SI-CHECK: BUFFER_LOAD_SBYTE
    109 ; SI-CHECK: BUFFER_LOAD_SBYTE
    110 ; SI-CHECK: BUFFER_LOAD_SBYTE
    111 ; SI-CHECK: BUFFER_LOAD_SBYTE
    112 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
    113 entry:
    114   %0 = load <4 x i8> addrspace(1)* %in
    115   %1 = sext <4 x i8> %0 to <4 x i32>
    116   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    117   ret void
    118 }
    119 
    120 ; Load an i16 value from the global address space.
    121 ; FUNC-LABEL: @load_i16
    122 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    123 ; SI-CHECK: BUFFER_LOAD_USHORT
    124 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
    125 entry:
    126   %0 = load i16	 addrspace(1)* %in
    127   %1 = zext i16 %0 to i32
    128   store i32 %1, i32 addrspace(1)* %out
    129   ret void
    130 }
    131 
    132 ; FUNC-LABEL: @load_i16_sext
    133 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    134 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
    135 ; R600-CHECK: 16
    136 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
    137 ; R600-CHECK: 16
    138 ; SI-CHECK: BUFFER_LOAD_SSHORT
    139 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
    140 entry:
    141   %0 = load i16 addrspace(1)* %in
    142   %1 = sext i16 %0 to i32
    143   store i32 %1, i32 addrspace(1)* %out
    144   ret void
    145 }
    146 
    147 ; FUNC-LABEL: @load_v2i16
    148 ; R600-CHECK: VTX_READ_16
    149 ; R600-CHECK: VTX_READ_16
    150 ; SI-CHECK: BUFFER_LOAD_USHORT
    151 ; SI-CHECK: BUFFER_LOAD_USHORT
    152 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
    153 entry:
    154   %0 = load <2 x i16> addrspace(1)* %in
    155   %1 = zext <2 x i16> %0 to <2 x i32>
    156   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    157   ret void
    158 }
    159 
    160 ; FUNC-LABEL: @load_v2i16_sext
    161 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    162 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    163 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
    164 ; R600-CHECK-DAG: 16
    165 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
    166 ; R600-CHECK-DAG: 16
    167 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
    168 ; R600-CHECK-DAG: 16
    169 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
    170 ; R600-CHECK-DAG: 16
    171 ; SI-CHECK: BUFFER_LOAD_SSHORT
    172 ; SI-CHECK: BUFFER_LOAD_SSHORT
    173 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
    174 entry:
    175   %0 = load <2 x i16> addrspace(1)* %in
    176   %1 = sext <2 x i16> %0 to <2 x i32>
    177   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    178   ret void
    179 }
    180 
    181 ; FUNC-LABEL: @load_v4i16
    182 ; R600-CHECK: VTX_READ_16
    183 ; R600-CHECK: VTX_READ_16
    184 ; R600-CHECK: VTX_READ_16
    185 ; R600-CHECK: VTX_READ_16
    186 ; SI-CHECK: BUFFER_LOAD_USHORT
    187 ; SI-CHECK: BUFFER_LOAD_USHORT
    188 ; SI-CHECK: BUFFER_LOAD_USHORT
    189 ; SI-CHECK: BUFFER_LOAD_USHORT
    190 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
    191 entry:
    192   %0 = load <4 x i16> addrspace(1)* %in
    193   %1 = zext <4 x i16> %0 to <4 x i32>
    194   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    195   ret void
    196 }
    197 
    198 ; FUNC-LABEL: @load_v4i16_sext
    199 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    200 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    201 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
    202 ; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
    203 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
    204 ; R600-CHECK-DAG: 16
    205 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
    206 ; R600-CHECK-DAG: 16
    207 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
    208 ; R600-CHECK-DAG: 16
    209 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
    210 ; R600-CHECK-DAG: 16
    211 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
    212 ; R600-CHECK-DAG: 16
    213 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
    214 ; R600-CHECK-DAG: 16
    215 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
    216 ; R600-CHECK-DAG: 16
    217 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
    218 ; R600-CHECK-DAG: 16
    219 ; SI-CHECK: BUFFER_LOAD_SSHORT
    220 ; SI-CHECK: BUFFER_LOAD_SSHORT
    221 ; SI-CHECK: BUFFER_LOAD_SSHORT
    222 ; SI-CHECK: BUFFER_LOAD_SSHORT
    223 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
    224 entry:
    225   %0 = load <4 x i16> addrspace(1)* %in
    226   %1 = sext <4 x i16> %0 to <4 x i32>
    227   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    228   ret void
    229 }
    230 
    231 ; load an i32 value from the global address space.
    232 ; FUNC-LABEL: @load_i32
    233 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    234 
    235 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
    236 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    237 entry:
    238   %0 = load i32 addrspace(1)* %in
    239   store i32 %0, i32 addrspace(1)* %out
    240   ret void
    241 }
    242 
    243 ; load a f32 value from the global address space.
    244 ; FUNC-LABEL: @load_f32
    245 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    246 
    247 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
    248 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    249 entry:
    250   %0 = load float addrspace(1)* %in
    251   store float %0, float addrspace(1)* %out
    252   ret void
    253 }
    254 
    255 ; load a v2f32 value from the global address space
    256 ; FUNC-LABEL: @load_v2f32
    257 ; R600-CHECK: VTX_READ_64
    258 
    259 ; SI-CHECK: BUFFER_LOAD_DWORDX2
    260 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
    261 entry:
    262   %0 = load <2 x float> addrspace(1)* %in
    263   store <2 x float> %0, <2 x float> addrspace(1)* %out
    264   ret void
    265 }
    266 
    267 ; FUNC-LABEL: @load_i64
    268 ; R600-CHECK: MEM_RAT
    269 ; R600-CHECK: MEM_RAT
    270 
    271 ; SI-CHECK: BUFFER_LOAD_DWORDX2
    272 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
    273 entry:
    274   %0 = load i64 addrspace(1)* %in
    275   store i64 %0, i64 addrspace(1)* %out
    276   ret void
    277 }
    278 
    279 ; FUNC-LABEL: @load_i64_sext
    280 ; R600-CHECK: MEM_RAT
    281 ; R600-CHECK: MEM_RAT
    282 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
    283 ; R600-CHECK: 31
    284 ; SI-CHECK: BUFFER_LOAD_DWORD
    285 
    286 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    287 entry:
    288   %0 = load i32 addrspace(1)* %in
    289   %1 = sext i32 %0 to i64
    290   store i64 %1, i64 addrspace(1)* %out
    291   ret void
    292 }
    293 
    294 ; FUNC-LABEL: @load_i64_zext
    295 ; R600-CHECK: MEM_RAT
    296 ; R600-CHECK: MEM_RAT
    297 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    298 entry:
    299   %0 = load i32 addrspace(1)* %in
    300   %1 = zext i32 %0 to i64
    301   store i64 %1, i64 addrspace(1)* %out
    302   ret void
    303 }
    304 
    305 ; FUNC-LABEL: @load_v8i32
    306 ; R600-CHECK: VTX_READ_128
    307 ; R600-CHECK: VTX_READ_128
    308 ; XXX: We should be using DWORDX4 instructions on SI.
    309 ; SI-CHECK: BUFFER_LOAD_DWORD
    310 ; SI-CHECK: BUFFER_LOAD_DWORD
    311 ; SI-CHECK: BUFFER_LOAD_DWORD
    312 ; SI-CHECK: BUFFER_LOAD_DWORD
    313 ; SI-CHECK: BUFFER_LOAD_DWORD
    314 ; SI-CHECK: BUFFER_LOAD_DWORD
    315 ; SI-CHECK: BUFFER_LOAD_DWORD
    316 ; SI-CHECK: BUFFER_LOAD_DWORD
    317 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
    318 entry:
    319   %0 = load <8 x i32> addrspace(1)* %in
    320   store <8 x i32> %0, <8 x i32> addrspace(1)* %out
    321   ret void
    322 }
    323 
    324 ; FUNC-LABEL: @load_v16i32
    325 ; R600-CHECK: VTX_READ_128
    326 ; R600-CHECK: VTX_READ_128
    327 ; R600-CHECK: VTX_READ_128
    328 ; R600-CHECK: VTX_READ_128
    329 ; XXX: We should be using DWORDX4 instructions on SI.
    330 ; SI-CHECK: BUFFER_LOAD_DWORD
    331 ; SI-CHECK: BUFFER_LOAD_DWORD
    332 ; SI-CHECK: BUFFER_LOAD_DWORD
    333 ; SI-CHECK: BUFFER_LOAD_DWORD
    334 ; SI-CHECK: BUFFER_LOAD_DWORD
    335 ; SI-CHECK: BUFFER_LOAD_DWORD
    336 ; SI-CHECK: BUFFER_LOAD_DWORD
    337 ; SI-CHECK: BUFFER_LOAD_DWORD
    338 ; SI-CHECK: BUFFER_LOAD_DWORD
    339 ; SI-CHECK: BUFFER_LOAD_DWORD
    340 ; SI-CHECK: BUFFER_LOAD_DWORD
    341 ; SI-CHECK: BUFFER_LOAD_DWORD
    342 ; SI-CHECK: BUFFER_LOAD_DWORD
    343 ; SI-CHECK: BUFFER_LOAD_DWORD
    344 ; SI-CHECK: BUFFER_LOAD_DWORD
    345 ; SI-CHECK: BUFFER_LOAD_DWORD
    346 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
    347 entry:
    348   %0 = load <16 x i32> addrspace(1)* %in
    349   store <16 x i32> %0, <16 x i32> addrspace(1)* %out
    350   ret void
    351 }
    352 
    353 ;===------------------------------------------------------------------------===;
    354 ; CONSTANT ADDRESS SPACE
    355 ;===------------------------------------------------------------------------===;
    356 
    357 ; Load a sign-extended i8 value
    358 ; FUNC-LABEL: @load_const_i8_sext
    359 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    360 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
    361 ; R600-CHECK: 24
    362 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
    363 ; R600-CHECK: 24
    364 ; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
    365 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    366 entry:
    367   %0 = load i8 addrspace(2)* %in
    368   %1 = sext i8 %0 to i32
    369   store i32 %1, i32 addrspace(1)* %out
    370   ret void
    371 }
    372 
    373 ; Load an aligned i8 value
    374 ; FUNC-LABEL: @load_const_i8_aligned
    375 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
    376 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
    377 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    378 entry:
    379   %0 = load i8 addrspace(2)* %in
    380   %1 = zext i8 %0 to i32
    381   store i32 %1, i32 addrspace(1)* %out
    382   ret void
    383 }
    384 
    385 ; Load an un-aligned i8 value
    386 ; FUNC-LABEL: @load_const_i8_unaligned
    387 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
    388 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
    389 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
    390 entry:
    391   %0 = getelementptr i8 addrspace(2)* %in, i32 1
    392   %1 = load i8 addrspace(2)* %0
    393   %2 = zext i8 %1 to i32
    394   store i32 %2, i32 addrspace(1)* %out
    395   ret void
    396 }
    397 
    398 ; Load a sign-extended i16 value
    399 ; FUNC-LABEL: @load_const_i16_sext
    400 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
    401 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
    402 ; R600-CHECK: 16
    403 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
    404 ; R600-CHECK: 16
    405 ; SI-CHECK: BUFFER_LOAD_SSHORT
    406 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    407 entry:
    408   %0 = load i16 addrspace(2)* %in
    409   %1 = sext i16 %0 to i32
    410   store i32 %1, i32 addrspace(1)* %out
    411   ret void
    412 }
    413 
    414 ; Load an aligned i16 value
    415 ; FUNC-LABEL: @load_const_i16_aligned
    416 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    417 ; SI-CHECK: BUFFER_LOAD_USHORT
    418 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    419 entry:
    420   %0 = load i16 addrspace(2)* %in
    421   %1 = zext i16 %0 to i32
    422   store i32 %1, i32 addrspace(1)* %out
    423   ret void
    424 }
    425 
    426 ; Load an un-aligned i16 value
    427 ; FUNC-LABEL: @load_const_i16_unaligned
    428 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
    429 ; SI-CHECK: BUFFER_LOAD_USHORT
    430 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
    431 entry:
    432   %0 = getelementptr i16 addrspace(2)* %in, i32 1
    433   %1 = load i16 addrspace(2)* %0
    434   %2 = zext i16 %1 to i32
    435   store i32 %2, i32 addrspace(1)* %out
    436   ret void
    437 }
    438 
    439 ; Load an i32 value from the constant address space.
    440 ; FUNC-LABEL: @load_const_addrspace_i32
    441 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    442 
    443 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
    444 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    445 entry:
    446   %0 = load i32 addrspace(2)* %in
    447   store i32 %0, i32 addrspace(1)* %out
    448   ret void
    449 }
    450 
    451 ; Load a f32 value from the constant address space.
    452 ; FUNC-LABEL: @load_const_addrspace_f32
    453 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
    454 
    455 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
    456 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
    457   %1 = load float addrspace(2)* %in
    458   store float %1, float addrspace(1)* %out
    459   ret void
    460 }
    461 
    462 ;===------------------------------------------------------------------------===;
    463 ; LOCAL ADDRESS SPACE
    464 ;===------------------------------------------------------------------------===;
    465 
    466 ; Load an i8 value from the local address space.
    467 ; FUNC-LABEL: @load_i8_local
    468 ; R600-CHECK: LDS_UBYTE_READ_RET
    469 ; SI-CHECK-NOT: S_WQM_B64
    470 ; SI-CHECK: S_MOV_B32 m0
    471 ; SI-CHECK: DS_READ_U8
    472 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
    473   %1 = load i8 addrspace(3)* %in
    474   %2 = zext i8 %1 to i32
    475   store i32 %2, i32 addrspace(1)* %out
    476   ret void
    477 }
    478 
    479 ; FUNC-LABEL: @load_i8_sext_local
    480 ; R600-CHECK: LDS_UBYTE_READ_RET
    481 ; R600-CHECK: ASHR
    482 ; SI-CHECK-NOT: S_WQM_B64
    483 ; SI-CHECK: S_MOV_B32 m0
    484 ; SI-CHECK: DS_READ_I8
    485 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
    486 entry:
    487   %0 = load i8 addrspace(3)* %in
    488   %1 = sext i8 %0 to i32
    489   store i32 %1, i32 addrspace(1)* %out
    490   ret void
    491 }
    492 
    493 ; FUNC-LABEL: @load_v2i8_local
    494 ; R600-CHECK: LDS_UBYTE_READ_RET
    495 ; R600-CHECK: LDS_UBYTE_READ_RET
    496 ; SI-CHECK-NOT: S_WQM_B64
    497 ; SI-CHECK: S_MOV_B32 m0
    498 ; SI-CHECK: DS_READ_U8
    499 ; SI-CHECK: DS_READ_U8
    500 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
    501 entry:
    502   %0 = load <2 x i8> addrspace(3)* %in
    503   %1 = zext <2 x i8> %0 to <2 x i32>
    504   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    505   ret void
    506 }
    507 
    508 ; FUNC-LABEL: @load_v2i8_sext_local
    509 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
    510 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
    511 ; R600-CHECK-DAG: ASHR
    512 ; R600-CHECK-DAG: ASHR
    513 ; SI-CHECK-NOT: S_WQM_B64
    514 ; SI-CHECK: S_MOV_B32 m0
    515 ; SI-CHECK: DS_READ_I8
    516 ; SI-CHECK: DS_READ_I8
    517 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
    518 entry:
    519   %0 = load <2 x i8> addrspace(3)* %in
    520   %1 = sext <2 x i8> %0 to <2 x i32>
    521   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    522   ret void
    523 }
    524 
    525 ; FUNC-LABEL: @load_v4i8_local
    526 ; R600-CHECK: LDS_UBYTE_READ_RET
    527 ; R600-CHECK: LDS_UBYTE_READ_RET
    528 ; R600-CHECK: LDS_UBYTE_READ_RET
    529 ; R600-CHECK: LDS_UBYTE_READ_RET
    530 ; SI-CHECK-NOT: S_WQM_B64
    531 ; SI-CHECK: S_MOV_B32 m0
    532 ; SI-CHECK: DS_READ_U8
    533 ; SI-CHECK: DS_READ_U8
    534 ; SI-CHECK: DS_READ_U8
    535 ; SI-CHECK: DS_READ_U8
    536 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
    537 entry:
    538   %0 = load <4 x i8> addrspace(3)* %in
    539   %1 = zext <4 x i8> %0 to <4 x i32>
    540   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    541   ret void
    542 }
    543 
    544 ; FUNC-LABEL: @load_v4i8_sext_local
    545 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
    546 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
    547 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
    548 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
    549 ; R600-CHECK-DAG: ASHR
    550 ; R600-CHECK-DAG: ASHR
    551 ; R600-CHECK-DAG: ASHR
    552 ; R600-CHECK-DAG: ASHR
    553 ; SI-CHECK-NOT: S_WQM_B64
    554 ; SI-CHECK: S_MOV_B32 m0
    555 ; SI-CHECK: DS_READ_I8
    556 ; SI-CHECK: DS_READ_I8
    557 ; SI-CHECK: DS_READ_I8
    558 ; SI-CHECK: DS_READ_I8
    559 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
    560 entry:
    561   %0 = load <4 x i8> addrspace(3)* %in
    562   %1 = sext <4 x i8> %0 to <4 x i32>
    563   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    564   ret void
    565 }
    566 
    567 ; Load an i16 value from the local address space.
    568 ; FUNC-LABEL: @load_i16_local
    569 ; R600-CHECK: LDS_USHORT_READ_RET
    570 ; SI-CHECK-NOT: S_WQM_B64
    571 ; SI-CHECK: S_MOV_B32 m0
    572 ; SI-CHECK: DS_READ_U16
    573 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
    574 entry:
    575   %0 = load i16	 addrspace(3)* %in
    576   %1 = zext i16 %0 to i32
    577   store i32 %1, i32 addrspace(1)* %out
    578   ret void
    579 }
    580 
    581 ; FUNC-LABEL: @load_i16_sext_local
    582 ; R600-CHECK: LDS_USHORT_READ_RET
    583 ; R600-CHECK: ASHR
    584 ; SI-CHECK-NOT: S_WQM_B64
    585 ; SI-CHECK: S_MOV_B32 m0
    586 ; SI-CHECK: DS_READ_I16
    587 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
    588 entry:
    589   %0 = load i16 addrspace(3)* %in
    590   %1 = sext i16 %0 to i32
    591   store i32 %1, i32 addrspace(1)* %out
    592   ret void
    593 }
    594 
    595 ; FUNC-LABEL: @load_v2i16_local
    596 ; R600-CHECK: LDS_USHORT_READ_RET
    597 ; R600-CHECK: LDS_USHORT_READ_RET
    598 ; SI-CHECK-NOT: S_WQM_B64
    599 ; SI-CHECK: S_MOV_B32 m0
    600 ; SI-CHECK: DS_READ_U16
    601 ; SI-CHECK: DS_READ_U16
    602 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
    603 entry:
    604   %0 = load <2 x i16> addrspace(3)* %in
    605   %1 = zext <2 x i16> %0 to <2 x i32>
    606   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    607   ret void
    608 }
    609 
    610 ; FUNC-LABEL: @load_v2i16_sext_local
    611 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
    612 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
    613 ; R600-CHECK-DAG: ASHR
    614 ; R600-CHECK-DAG: ASHR
    615 ; SI-CHECK-NOT: S_WQM_B64
    616 ; SI-CHECK: S_MOV_B32 m0
    617 ; SI-CHECK: DS_READ_I16
    618 ; SI-CHECK: DS_READ_I16
    619 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
    620 entry:
    621   %0 = load <2 x i16> addrspace(3)* %in
    622   %1 = sext <2 x i16> %0 to <2 x i32>
    623   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
    624   ret void
    625 }
    626 
    627 ; FUNC-LABEL: @load_v4i16_local
    628 ; R600-CHECK: LDS_USHORT_READ_RET
    629 ; R600-CHECK: LDS_USHORT_READ_RET
    630 ; R600-CHECK: LDS_USHORT_READ_RET
    631 ; R600-CHECK: LDS_USHORT_READ_RET
    632 ; SI-CHECK-NOT: S_WQM_B64
    633 ; SI-CHECK: S_MOV_B32 m0
    634 ; SI-CHECK: DS_READ_U16
    635 ; SI-CHECK: DS_READ_U16
    636 ; SI-CHECK: DS_READ_U16
    637 ; SI-CHECK: DS_READ_U16
    638 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
    639 entry:
    640   %0 = load <4 x i16> addrspace(3)* %in
    641   %1 = zext <4 x i16> %0 to <4 x i32>
    642   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    643   ret void
    644 }
    645 
    646 ; FUNC-LABEL: @load_v4i16_sext_local
    647 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
    648 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
    649 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
    650 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
    651 ; R600-CHECK-DAG: ASHR
    652 ; R600-CHECK-DAG: ASHR
    653 ; R600-CHECK-DAG: ASHR
    654 ; R600-CHECK-DAG: ASHR
    655 ; SI-CHECK-NOT: S_WQM_B64
    656 ; SI-CHECK: S_MOV_B32 m0
    657 ; SI-CHECK: DS_READ_I16
    658 ; SI-CHECK: DS_READ_I16
    659 ; SI-CHECK: DS_READ_I16
    660 ; SI-CHECK: DS_READ_I16
    661 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
    662 entry:
    663   %0 = load <4 x i16> addrspace(3)* %in
    664   %1 = sext <4 x i16> %0 to <4 x i32>
    665   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
    666   ret void
    667 }
    668 
    669 ; load an i32 value from the local address space.
    670 ; FUNC-LABEL: @load_i32_local
    671 ; R600-CHECK: LDS_READ_RET
    672 ; SI-CHECK-NOT: S_WQM_B64
    673 ; SI-CHECK: S_MOV_B32 m0
    674 ; SI-CHECK: DS_READ_B32
    675 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
    676 entry:
    677   %0 = load i32 addrspace(3)* %in
    678   store i32 %0, i32 addrspace(1)* %out
    679   ret void
    680 }
    681 
    682 ; load a f32 value from the local address space.
    683 ; FUNC-LABEL: @load_f32_local
    684 ; R600-CHECK: LDS_READ_RET
    685 ; SI-CHECK: S_MOV_B32 m0
    686 ; SI-CHECK: DS_READ_B32
    687 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
    688 entry:
    689   %0 = load float addrspace(3)* %in
    690   store float %0, float addrspace(1)* %out
    691   ret void
    692 }
    693 
    694 ; load a v2f32 value from the local address space
    695 ; FUNC-LABEL: @load_v2f32_local
    696 ; R600-CHECK: LDS_READ_RET
    697 ; R600-CHECK: LDS_READ_RET
    698 ; SI-CHECK: S_MOV_B32 m0
    699 ; SI-CHECK: DS_READ_B64
    700 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
    701 entry:
    702   %0 = load <2 x float> addrspace(3)* %in
    703   store <2 x float> %0, <2 x float> addrspace(1)* %out
    704   ret void
    705 }
    706