Home | History | Annotate | Download | only in R600
      1 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
      2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
      3 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
      4 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
      5 
      6 ; FUNC-LABEL: {{^}}i8_arg:
      7 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
      8 ; GCN: buffer_load_ubyte
      9 
     10 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
     11 entry:
     12   %0 = zext i8 %in to i32
     13   store i32 %0, i32 addrspace(1)* %out, align 4
     14   ret void
     15 }
     16 
     17 ; FUNC-LABEL: {{^}}i8_zext_arg:
     18 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     19 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     20 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     21 
     22 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
     23 entry:
     24   %0 = zext i8 %in to i32
     25   store i32 %0, i32 addrspace(1)* %out, align 4
     26   ret void
     27 }
     28 
     29 ; FUNC-LABEL: {{^}}i8_sext_arg:
     30 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     31 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     32 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     33 
     34 define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
     35 entry:
     36   %0 = sext i8 %in to i32
     37   store i32 %0, i32 addrspace(1)* %out, align 4
     38   ret void
     39 }
     40 
     41 ; FUNC-LABEL: {{^}}i16_arg:
     42 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     43 ; GCN: buffer_load_ushort
     44 
     45 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
     46 entry:
     47   %0 = zext i16 %in to i32
     48   store i32 %0, i32 addrspace(1)* %out, align 4
     49   ret void
     50 }
     51 
     52 ; FUNC-LABEL: {{^}}i16_zext_arg:
     53 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     54 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     55 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     56 
     57 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
     58 entry:
     59   %0 = zext i16 %in to i32
     60   store i32 %0, i32 addrspace(1)* %out, align 4
     61   ret void
     62 }
     63 
     64 ; FUNC-LABEL: {{^}}i16_sext_arg:
     65 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
     66 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     67 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     68 
     69 define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
     70 entry:
     71   %0 = sext i16 %in to i32
     72   store i32 %0, i32 addrspace(1)* %out, align 4
     73   ret void
     74 }
     75 
     76 ; FUNC-LABEL: {{^}}i32_arg:
     77 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
     78 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     79 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     80 define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
     81 entry:
     82   store i32 %in, i32 addrspace(1)* %out, align 4
     83   ret void
     84 }
     85 
     86 ; FUNC-LABEL: {{^}}f32_arg:
     87 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
     88 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
     89 ; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
     90 define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
     91 entry:
     92   store float %in, float addrspace(1)* %out, align 4
     93   ret void
     94 }
     95 
     96 ; FUNC-LABEL: {{^}}v2i8_arg:
     97 ; EG: VTX_READ_8
     98 ; EG: VTX_READ_8
     99 ; GCN: buffer_load_ubyte
    100 ; GCN: buffer_load_ubyte
    101 define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
    102 entry:
    103   store <2 x i8> %in, <2 x i8> addrspace(1)* %out
    104   ret void
    105 }
    106 
    107 ; FUNC-LABEL: {{^}}v2i16_arg:
    108 ; EG: VTX_READ_16
    109 ; EG: VTX_READ_16
    110 ; GCN-DAG: buffer_load_ushort
    111 ; GCN-DAG: buffer_load_ushort
    112 define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
    113 entry:
    114   store <2 x i16> %in, <2 x i16> addrspace(1)* %out
    115   ret void
    116 }
    117 
    118 ; FUNC-LABEL: {{^}}v2i32_arg:
    119 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
    120 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
    121 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
    122 ; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
    123 define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
    124 entry:
    125   store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
    126   ret void
    127 }
    128 
    129 ; FUNC-LABEL: {{^}}v2f32_arg:
    130 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
    131 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
    132 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
    133 ; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
    134 define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
    135 entry:
    136   store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
    137   ret void
    138 }
    139 
    140 ; FUNC-LABEL: {{^}}v3i8_arg:
    141 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
    142 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
    143 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
    144 define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
    145 entry:
    146   store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
    147   ret void
    148 }
    149 
    150 ; FUNC-LABEL: {{^}}v3i16_arg:
    151 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
    152 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
    153 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
    154 define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
    155 entry:
    156   store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
    157   ret void
    158 }
    159 ; FUNC-LABEL: {{^}}v3i32_arg:
    160 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    161 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    162 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    163 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
    164 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
    165 define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
    166 entry:
    167   store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
    168   ret void
    169 }
    170 
    171 ; FUNC-LABEL: {{^}}v3f32_arg:
    172 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    173 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    174 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    175 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
    176 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
    177 define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
    178 entry:
    179   store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
    180   ret void
    181 }
    182 
    183 ; FUNC-LABEL: {{^}}v4i8_arg:
    184 ; EG: VTX_READ_8
    185 ; EG: VTX_READ_8
    186 ; EG: VTX_READ_8
    187 ; EG: VTX_READ_8
    188 ; GCN: buffer_load_ubyte
    189 ; GCN: buffer_load_ubyte
    190 ; GCN: buffer_load_ubyte
    191 ; GCN: buffer_load_ubyte
    192 define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
    193 entry:
    194   store <4 x i8> %in, <4 x i8> addrspace(1)* %out
    195   ret void
    196 }
    197 
    198 ; FUNC-LABEL: {{^}}v4i16_arg:
    199 ; EG: VTX_READ_16
    200 ; EG: VTX_READ_16
    201 ; EG: VTX_READ_16
    202 ; EG: VTX_READ_16
    203 ; GCN: buffer_load_ushort
    204 ; GCN: buffer_load_ushort
    205 ; GCN: buffer_load_ushort
    206 ; GCN: buffer_load_ushort
    207 define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
    208 entry:
    209   store <4 x i16> %in, <4 x i16> addrspace(1)* %out
    210   ret void
    211 }
    212 
    213 ; FUNC-LABEL: {{^}}v4i32_arg:
    214 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    215 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    216 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    217 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
    218 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
    219 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
    220 define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
    221 entry:
    222   store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
    223   ret void
    224 }
    225 
    226 ; FUNC-LABEL: {{^}}v4f32_arg:
    227 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
    228 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
    229 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
    230 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
    231 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
    232 ; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
    233 define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
    234 entry:
    235   store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
    236   ret void
    237 }
    238 
    239 ; FUNC-LABEL: {{^}}v8i8_arg:
    240 ; EG: VTX_READ_8
    241 ; EG: VTX_READ_8
    242 ; EG: VTX_READ_8
    243 ; EG: VTX_READ_8
    244 ; EG: VTX_READ_8
    245 ; EG: VTX_READ_8
    246 ; EG: VTX_READ_8
    247 ; EG: VTX_READ_8
    248 ; GCN: buffer_load_ubyte
    249 ; GCN: buffer_load_ubyte
    250 ; GCN: buffer_load_ubyte
    251 ; GCN: buffer_load_ubyte
    252 ; GCN: buffer_load_ubyte
    253 ; GCN: buffer_load_ubyte
    254 ; GCN: buffer_load_ubyte
    255 define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
    256 entry:
    257   store <8 x i8> %in, <8 x i8> addrspace(1)* %out
    258   ret void
    259 }
    260 
    261 ; FUNC-LABEL: {{^}}v8i16_arg:
    262 ; EG: VTX_READ_16
    263 ; EG: VTX_READ_16
    264 ; EG: VTX_READ_16
    265 ; EG: VTX_READ_16
    266 ; EG: VTX_READ_16
    267 ; EG: VTX_READ_16
    268 ; EG: VTX_READ_16
    269 ; EG: VTX_READ_16
    270 ; GCN: buffer_load_ushort
    271 ; GCN: buffer_load_ushort
    272 ; GCN: buffer_load_ushort
    273 ; GCN: buffer_load_ushort
    274 ; GCN: buffer_load_ushort
    275 ; GCN: buffer_load_ushort
    276 ; GCN: buffer_load_ushort
    277 ; GCN: buffer_load_ushort
    278 define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
    279 entry:
    280   store <8 x i16> %in, <8 x i16> addrspace(1)* %out
    281   ret void
    282 }
    283 
    284 ; FUNC-LABEL: {{^}}v8i32_arg:
    285 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
    286 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
    287 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
    288 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
    289 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
    290 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
    291 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
    292 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
    293 ; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
    294 ; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
    295 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
    296 entry:
    297   store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
    298   ret void
    299 }
    300 
    301 ; FUNC-LABEL: {{^}}v8f32_arg:
    302 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
    303 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
    304 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
    305 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
    306 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
    307 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
    308 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
    309 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
    310 ; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
    311 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
    312 entry:
    313   store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
    314   ret void
    315 }
    316 
    317 ; FUNC-LABEL: {{^}}v16i8_arg:
    318 ; EG: VTX_READ_8
    319 ; EG: VTX_READ_8
    320 ; EG: VTX_READ_8
    321 ; EG: VTX_READ_8
    322 ; EG: VTX_READ_8
    323 ; EG: VTX_READ_8
    324 ; EG: VTX_READ_8
    325 ; EG: VTX_READ_8
    326 ; EG: VTX_READ_8
    327 ; EG: VTX_READ_8
    328 ; EG: VTX_READ_8
    329 ; EG: VTX_READ_8
    330 ; EG: VTX_READ_8
    331 ; EG: VTX_READ_8
    332 ; EG: VTX_READ_8
    333 ; EG: VTX_READ_8
    334 ; GCN: buffer_load_ubyte
    335 ; GCN: buffer_load_ubyte
    336 ; GCN: buffer_load_ubyte
    337 ; GCN: buffer_load_ubyte
    338 ; GCN: buffer_load_ubyte
    339 ; GCN: buffer_load_ubyte
    340 ; GCN: buffer_load_ubyte
    341 ; GCN: buffer_load_ubyte
    342 ; GCN: buffer_load_ubyte
    343 ; GCN: buffer_load_ubyte
    344 ; GCN: buffer_load_ubyte
    345 ; GCN: buffer_load_ubyte
    346 ; GCN: buffer_load_ubyte
    347 ; GCN: buffer_load_ubyte
    348 ; GCN: buffer_load_ubyte
    349 ; GCN: buffer_load_ubyte
    350 define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
    351 entry:
    352   store <16 x i8> %in, <16 x i8> addrspace(1)* %out
    353   ret void
    354 }
    355 
    356 ; FUNC-LABEL: {{^}}v16i16_arg:
    357 ; EG: VTX_READ_16
    358 ; EG: VTX_READ_16
    359 ; EG: VTX_READ_16
    360 ; EG: VTX_READ_16
    361 ; EG: VTX_READ_16
    362 ; EG: VTX_READ_16
    363 ; EG: VTX_READ_16
    364 ; EG: VTX_READ_16
    365 ; EG: VTX_READ_16
    366 ; EG: VTX_READ_16
    367 ; EG: VTX_READ_16
    368 ; EG: VTX_READ_16
    369 ; EG: VTX_READ_16
    370 ; EG: VTX_READ_16
    371 ; EG: VTX_READ_16
    372 ; EG: VTX_READ_16
    373 ; GCN: buffer_load_ushort
    374 ; GCN: buffer_load_ushort
    375 ; GCN: buffer_load_ushort
    376 ; GCN: buffer_load_ushort
    377 ; GCN: buffer_load_ushort
    378 ; GCN: buffer_load_ushort
    379 ; GCN: buffer_load_ushort
    380 ; GCN: buffer_load_ushort
    381 ; GCN: buffer_load_ushort
    382 ; GCN: buffer_load_ushort
    383 ; GCN: buffer_load_ushort
    384 ; GCN: buffer_load_ushort
    385 ; GCN: buffer_load_ushort
    386 ; GCN: buffer_load_ushort
    387 ; GCN: buffer_load_ushort
    388 ; GCN: buffer_load_ushort
    389 define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
    390 entry:
    391   store <16 x i16> %in, <16 x i16> addrspace(1)* %out
    392   ret void
    393 }
    394 
    395 ; FUNC-LABEL: {{^}}v16i32_arg:
    396 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
    397 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
    398 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
    399 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
    400 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
    401 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
    402 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
    403 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
    404 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
    405 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
    406 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
    407 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
    408 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
    409 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
    410 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
    411 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
    412 ; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
    413 ; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
    414 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
    415 entry:
    416   store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
    417   ret void
    418 }
    419 
    420 ; FUNC-LABEL: {{^}}v16f32_arg:
    421 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
    422 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
    423 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
    424 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
    425 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
    426 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
    427 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
    428 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
    429 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
    430 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
    431 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
    432 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
    433 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
    434 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
    435 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
    436 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
    437 ; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
    438 ; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
    439 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
    440 entry:
    441   store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
    442   ret void
    443 }
    444 
    445 ; FUNC-LABEL: {{^}}kernel_arg_i64:
    446 ; GCN: s_load_dwordx2
    447 ; GCN: s_load_dwordx2
    448 ; GCN: buffer_store_dwordx2
    449 define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
    450   store i64 %a, i64 addrspace(1)* %out, align 8
    451   ret void
    452 }
    453 
    454 ; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
    455 ; XGCN: s_load_dwordx2
    456 ; XGCN: s_load_dwordx2
    457 ; XGCN: buffer_store_dwordx2
    458 ; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
    459 ;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
    460 ;   ret void
    461 ; }
    462