Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
      2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
      3 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
      4 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
      5 
      6 ; FUNC-LABEL: {{^}}i8_arg:
      7 ; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
      8 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
      9 ; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
     10 ; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
     11 
     12 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
     13 entry:
     14   %0 = zext i8 %in to i32
     15   store i32 %0, i32 addrspace(1)* %out, align 4
     16   ret void
     17 }
     18 
     19 ; FUNC-LABEL: {{^}}i8_zext_arg:
     20 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     21 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     22 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     23 
     24 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
     25 entry:
     26   %0 = zext i8 %in to i32
     27   store i32 %0, i32 addrspace(1)* %out, align 4
     28   ret void
     29 }
     30 
     31 ; FUNC-LABEL: {{^}}i8_sext_arg:
     32 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     33 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     34 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     35 
     36 define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
     37 entry:
     38   %0 = sext i8 %in to i32
     39   store i32 %0, i32 addrspace(1)* %out, align 4
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}i16_arg:
     44 ; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     45 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
     46 ; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
     47 ; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
     48 
     49 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
     50 entry:
     51   %0 = zext i16 %in to i32
     52   store i32 %0, i32 addrspace(1)* %out, align 4
     53   ret void
     54 }
     55 
     56 ; FUNC-LABEL: {{^}}i16_zext_arg:
     57 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     58 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     59 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     60 
     61 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
     62 entry:
     63   %0 = zext i16 %in to i32
     64   store i32 %0, i32 addrspace(1)* %out, align 4
     65   ret void
     66 }
     67 
     68 ; FUNC-LABEL: {{^}}i16_sext_arg:
     69 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     70 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     71 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     72 
     73 define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
     74 entry:
     75   %0 = sext i16 %in to i32
     76   store i32 %0, i32 addrspace(1)* %out, align 4
     77   ret void
     78 }
     79 
     80 ; FUNC-LABEL: {{^}}i32_arg:
     81 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
     82 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     83 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     84 define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
     85 entry:
     86   store i32 %in, i32 addrspace(1)* %out, align 4
     87   ret void
     88 }
     89 
     90 ; FUNC-LABEL: {{^}}f32_arg:
     91 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
     92 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     93 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     94 define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
     95 entry:
     96   store float %in, float addrspace(1)* %out, align 4
     97   ret void
     98 }
     99 
    100 ; FUNC-LABEL: {{^}}v2i8_arg:
    101 ; EG: VTX_READ_8
    102 ; EG: VTX_READ_8
    103 ; GCN: buffer_load_ubyte
    104 ; GCN: buffer_load_ubyte
    105 define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
    106 entry:
    107   store <2 x i8> %in, <2 x i8> addrspace(1)* %out
    108   ret void
    109 }
    110 
    111 ; FUNC-LABEL: {{^}}v2i16_arg:
    112 ; EG: VTX_READ_16
    113 ; EG: VTX_READ_16
    114 ; GCN-DAG: buffer_load_ushort
    115 ; GCN-DAG: buffer_load_ushort
    116 define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
    117 entry:
    118   store <2 x i16> %in, <2 x i16> addrspace(1)* %out
    119   ret void
    120 }
    121 
    122 ; FUNC-LABEL: {{^}}v2i32_arg:
    123 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
    124 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
    125 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
    126 ; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
    127 define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
    128 entry:
    129   store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
    130   ret void
    131 }
    132 
    133 ; FUNC-LABEL: {{^}}v2f32_arg:
    134 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
    135 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
    136 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
    137 ; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
    138 define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
    139 entry:
    140   store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
    141   ret void
    142 }
    143 
    144 ; FUNC-LABEL: {{^}}v3i8_arg:
    145 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
    146 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
    147 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
    148 define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
    149 entry:
    150   store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
    151   ret void
    152 }
    153 
    154 ; FUNC-LABEL: {{^}}v3i16_arg:
    155 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
    156 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
    157 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
    158 define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
    159 entry:
    160   store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
    161   ret void
    162 }
    163 ; FUNC-LABEL: {{^}}v3i32_arg:
    164 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    165 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    166 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    167 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
    168 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
    169 define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
    170 entry:
    171   store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
    172   ret void
    173 }
    174 
    175 ; FUNC-LABEL: {{^}}v3f32_arg:
    176 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    177 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    178 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    179 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
    180 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
    181 define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
    182 entry:
    183   store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
    184   ret void
    185 }
    186 
    187 ; FUNC-LABEL: {{^}}v4i8_arg:
    188 ; EG: VTX_READ_8
    189 ; EG: VTX_READ_8
    190 ; EG: VTX_READ_8
    191 ; EG: VTX_READ_8
    192 ; GCN: buffer_load_ubyte
    193 ; GCN: buffer_load_ubyte
    194 ; GCN: buffer_load_ubyte
    195 ; GCN: buffer_load_ubyte
    196 define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
    197 entry:
    198   store <4 x i8> %in, <4 x i8> addrspace(1)* %out
    199   ret void
    200 }
    201 
    202 ; FUNC-LABEL: {{^}}v4i16_arg:
    203 ; EG: VTX_READ_16
    204 ; EG: VTX_READ_16
    205 ; EG: VTX_READ_16
    206 ; EG: VTX_READ_16
    207 ; GCN: buffer_load_ushort
    208 ; GCN: buffer_load_ushort
    209 ; GCN: buffer_load_ushort
    210 ; GCN: buffer_load_ushort
    211 define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
    212 entry:
    213   store <4 x i16> %in, <4 x i16> addrspace(1)* %out
    214   ret void
    215 }
    216 
    217 ; FUNC-LABEL: {{^}}v4i32_arg:
    218 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    219 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    220 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    221 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
    222 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
    223 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
    224 define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
    225 entry:
    226   store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
    227   ret void
    228 }
    229 
    230 ; FUNC-LABEL: {{^}}v4f32_arg:
    231 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    232 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    233 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    234 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
    235 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
    236 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
    237 define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
    238 entry:
    239   store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
    240   ret void
    241 }
    242 
    243 ; FUNC-LABEL: {{^}}v8i8_arg:
    244 ; EG: VTX_READ_8
    245 ; EG: VTX_READ_8
    246 ; EG: VTX_READ_8
    247 ; EG: VTX_READ_8
    248 ; EG: VTX_READ_8
    249 ; EG: VTX_READ_8
    250 ; EG: VTX_READ_8
    251 ; EG: VTX_READ_8
    252 ; GCN: buffer_load_ubyte
    253 ; GCN: buffer_load_ubyte
    254 ; GCN: buffer_load_ubyte
    255 ; GCN: buffer_load_ubyte
    256 ; GCN: buffer_load_ubyte
    257 ; GCN: buffer_load_ubyte
    258 ; GCN: buffer_load_ubyte
    259 define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
    260 entry:
    261   store <8 x i8> %in, <8 x i8> addrspace(1)* %out
    262   ret void
    263 }
    264 
    265 ; FUNC-LABEL: {{^}}v8i16_arg:
    266 ; EG: VTX_READ_16
    267 ; EG: VTX_READ_16
    268 ; EG: VTX_READ_16
    269 ; EG: VTX_READ_16
    270 ; EG: VTX_READ_16
    271 ; EG: VTX_READ_16
    272 ; EG: VTX_READ_16
    273 ; EG: VTX_READ_16
    274 ; GCN: buffer_load_ushort
    275 ; GCN: buffer_load_ushort
    276 ; GCN: buffer_load_ushort
    277 ; GCN: buffer_load_ushort
    278 ; GCN: buffer_load_ushort
    279 ; GCN: buffer_load_ushort
    280 ; GCN: buffer_load_ushort
    281 ; GCN: buffer_load_ushort
    282 define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
    283 entry:
    284   store <8 x i16> %in, <8 x i16> addrspace(1)* %out
    285   ret void
    286 }
    287 
    288 ; FUNC-LABEL: {{^}}v8i32_arg:
    289 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
    290 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
    291 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
    292 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
    293 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
    294 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
    295 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
    296 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
    297 ; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
    298 ; VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
    299 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
    300 entry:
    301   store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
    302   ret void
    303 }
    304 
    305 ; FUNC-LABEL: {{^}}v8f32_arg:
    306 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
    307 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
    308 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
    309 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
    310 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
    311 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
    312 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
    313 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
    314 ; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
    315 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
    316 entry:
    317   store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
    318   ret void
    319 }
    320 
    321 ; FUNC-LABEL: {{^}}v16i8_arg:
    322 ; EG: VTX_READ_8
    323 ; EG: VTX_READ_8
    324 ; EG: VTX_READ_8
    325 ; EG: VTX_READ_8
    326 ; EG: VTX_READ_8
    327 ; EG: VTX_READ_8
    328 ; EG: VTX_READ_8
    329 ; EG: VTX_READ_8
    330 ; EG: VTX_READ_8
    331 ; EG: VTX_READ_8
    332 ; EG: VTX_READ_8
    333 ; EG: VTX_READ_8
    334 ; EG: VTX_READ_8
    335 ; EG: VTX_READ_8
    336 ; EG: VTX_READ_8
    337 ; EG: VTX_READ_8
    338 ; GCN: buffer_load_ubyte
    339 ; GCN: buffer_load_ubyte
    340 ; GCN: buffer_load_ubyte
    341 ; GCN: buffer_load_ubyte
    342 ; GCN: buffer_load_ubyte
    343 ; GCN: buffer_load_ubyte
    344 ; GCN: buffer_load_ubyte
    345 ; GCN: buffer_load_ubyte
    346 ; GCN: buffer_load_ubyte
    347 ; GCN: buffer_load_ubyte
    348 ; GCN: buffer_load_ubyte
    349 ; GCN: buffer_load_ubyte
    350 ; GCN: buffer_load_ubyte
    351 ; GCN: buffer_load_ubyte
    352 ; GCN: buffer_load_ubyte
    353 ; GCN: buffer_load_ubyte
    354 define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
    355 entry:
    356   store <16 x i8> %in, <16 x i8> addrspace(1)* %out
    357   ret void
    358 }
    359 
    360 ; FUNC-LABEL: {{^}}v16i16_arg:
    361 ; EG: VTX_READ_16
    362 ; EG: VTX_READ_16
    363 ; EG: VTX_READ_16
    364 ; EG: VTX_READ_16
    365 ; EG: VTX_READ_16
    366 ; EG: VTX_READ_16
    367 ; EG: VTX_READ_16
    368 ; EG: VTX_READ_16
    369 ; EG: VTX_READ_16
    370 ; EG: VTX_READ_16
    371 ; EG: VTX_READ_16
    372 ; EG: VTX_READ_16
    373 ; EG: VTX_READ_16
    374 ; EG: VTX_READ_16
    375 ; EG: VTX_READ_16
    376 ; EG: VTX_READ_16
    377 ; GCN: buffer_load_ushort
    378 ; GCN: buffer_load_ushort
    379 ; GCN: buffer_load_ushort
    380 ; GCN: buffer_load_ushort
    381 ; GCN: buffer_load_ushort
    382 ; GCN: buffer_load_ushort
    383 ; GCN: buffer_load_ushort
    384 ; GCN: buffer_load_ushort
    385 ; GCN: buffer_load_ushort
    386 ; GCN: buffer_load_ushort
    387 ; GCN: buffer_load_ushort
    388 ; GCN: buffer_load_ushort
    389 ; GCN: buffer_load_ushort
    390 ; GCN: buffer_load_ushort
    391 ; GCN: buffer_load_ushort
    392 ; GCN: buffer_load_ushort
    393 define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
    394 entry:
    395   store <16 x i16> %in, <16 x i16> addrspace(1)* %out
    396   ret void
    397 }
    398 
    399 ; FUNC-LABEL: {{^}}v16i32_arg:
    400 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
    401 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
    402 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
    403 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
    404 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
    405 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
    406 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
    407 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
    408 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
    409 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
    410 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
    411 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
    412 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
    413 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
    414 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
    415 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
    416 ; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
    417 ; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
    418 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
    419 entry:
    420   store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
    421   ret void
    422 }
    423 
    424 ; FUNC-LABEL: {{^}}v16f32_arg:
    425 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
    426 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
    427 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
    428 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
    429 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
    430 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
    431 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
    432 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
    433 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
    434 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
    435 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
    436 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
    437 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
    438 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
    439 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
    440 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
    441 ; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
    442 ; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
    443 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
    444 entry:
    445   store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
    446   ret void
    447 }
    448 
    449 ; FUNC-LABEL: {{^}}kernel_arg_i64:
    450 ; GCN: s_load_dwordx2
    451 ; GCN: s_load_dwordx2
    452 ; GCN: buffer_store_dwordx2
    453 define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
    454   store i64 %a, i64 addrspace(1)* %out, align 8
    455   ret void
    456 }
    457 
    458 ; FUNC-LABEL: {{^}}f64_kernel_arg:
    459 ; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
    460 ; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
    461 ; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
    462 ; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
    463 ; GCN: buffer_store_dwordx2
    464 define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
    465 entry:
    466   store double %in, double addrspace(1)* %out
    467   ret void
    468 }
    469 
    470 ; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
    471 ; XGCN: s_load_dwordx2
    472 ; XGCN: s_load_dwordx2
    473 ; XGCN: buffer_store_dwordx2
    474 ; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
    475 ;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
    476 ;   ret void
    477 ; }
    478 
    479 ; FUNC-LABEL: {{^}}i1_arg:
    480 ; SI: buffer_load_ubyte
    481 ; SI: v_and_b32_e32
    482 ; SI: buffer_store_byte
    483 ; SI: s_endpgm
    484 define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
    485   store i1 %x, i1 addrspace(1)* %out, align 1
    486   ret void
    487 }
    488 
    489 ; FUNC-LABEL: {{^}}i1_arg_zext_i32:
    490 ; SI: buffer_load_ubyte
    491 ; SI: buffer_store_dword
    492 ; SI: s_endpgm
    493 define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
    494   %ext = zext i1 %x to i32
    495   store i32 %ext, i32 addrspace(1)* %out, align 4
    496   ret void
    497 }
    498 
    499 ; FUNC-LABEL: {{^}}i1_arg_zext_i64:
    500 ; SI: buffer_load_ubyte
    501 ; SI: buffer_store_dwordx2
    502 ; SI: s_endpgm
    503 define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
    504   %ext = zext i1 %x to i64
    505   store i64 %ext, i64 addrspace(1)* %out, align 8
    506   ret void
    507 }
    508 
    509 ; FUNC-LABEL: {{^}}i1_arg_sext_i32:
    510 ; SI: buffer_load_ubyte
    511 ; SI: buffer_store_dword
    512 ; SI: s_endpgm
    513 define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
    514   %ext = sext i1 %x to i32
    515   store i32 %ext, i32addrspace(1)* %out, align 4
    516   ret void
    517 }
    518 
    519 ; FUNC-LABEL: {{^}}i1_arg_sext_i64:
    520 ; SI: buffer_load_ubyte
    521 ; SI: v_bfe_i32
    522 ; SI: v_ashrrev_i32
    523 ; SI: buffer_store_dwordx2
    524 ; SI: s_endpgm
    525 define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
    526   %ext = sext i1 %x to i64
    527   store i64 %ext, i64 addrspace(1)* %out, align 8
    528   ret void
    529 }
    530