Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
      4 
      5 ; GCN-LABEL: {{^}}load_1d:
      6 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
      7 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
      8 main_body:
      9   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
     10   ret <4 x float> %v
     11 }
     12 
     13 ; GCN-LABEL: {{^}}load_2d:
     14 ; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
     15 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
     16 main_body:
     17   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
     18   ret <4 x float> %v
     19 }
     20 
     21 ; GCN-LABEL: {{^}}load_3d:
     22 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
     23 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
     24 main_body:
     25   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
     26   ret <4 x float> %v
     27 }
     28 
     29 ; GCN-LABEL: {{^}}load_cube:
     30 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
     31 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
     32 main_body:
     33   %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
     34   ret <4 x float> %v
     35 }
     36 
     37 ; GCN-LABEL: {{^}}load_1darray:
     38 ; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
     39 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
     40 main_body:
     41   %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
     42   ret <4 x float> %v
     43 }
     44 
     45 ; GCN-LABEL: {{^}}load_2darray:
     46 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
     47 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
     48 main_body:
     49   %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
     50   ret <4 x float> %v
     51 }
     52 
     53 ; GCN-LABEL: {{^}}load_2dmsaa:
     54 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
     55 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
     56 main_body:
     57   %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
     58   ret <4 x float> %v
     59 }
     60 
     61 ; GCN-LABEL: {{^}}load_2darraymsaa:
     62 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
     63 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
     64 main_body:
     65   %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
     66   ret <4 x float> %v
     67 }
     68 
     69 ; GCN-LABEL: {{^}}load_mip_1d:
     70 ; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
     71 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
     72 main_body:
     73   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
     74   ret <4 x float> %v
     75 }
     76 
     77 ; GCN-LABEL: {{^}}load_mip_2d:
     78 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
     79 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
     80 main_body:
     81   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
     82   ret <4 x float> %v
     83 }
     84 
     85 ; GCN-LABEL: {{^}}load_mip_3d:
     86 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
     87 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
     88 main_body:
     89   %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
     90   ret <4 x float> %v
     91 }
     92 
     93 ; GCN-LABEL: {{^}}load_mip_cube:
     94 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
     95 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
     96 main_body:
     97   %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
     98   ret <4 x float> %v
     99 }
    100 
    101 ; GCN-LABEL: {{^}}load_mip_1darray:
    102 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
    103 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
    104 main_body:
    105   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    106   ret <4 x float> %v
    107 }
    108 
    109 ; GCN-LABEL: {{^}}load_mip_2darray:
    110 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
    111 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
    112 main_body:
    113   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    114   ret <4 x float> %v
    115 }
    116 
    117 ; GCN-LABEL: {{^}}store_1d:
    118 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
    119 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
    120 main_body:
    121   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
    122   ret void
    123 }
    124 
    125 ; GCN-LABEL: {{^}}store_2d:
    126 ; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
    127 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
    128 main_body:
    129   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
    130   ret void
    131 }
    132 
    133 ; GCN-LABEL: {{^}}store_3d:
    134 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
    135 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
    136 main_body:
    137   call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
    138   ret void
    139 }
    140 
    141 ; GCN-LABEL: {{^}}store_cube:
    142 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
    143 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
    144 main_body:
    145   call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
    146   ret void
    147 }
    148 
    149 ; GCN-LABEL: {{^}}store_1darray:
    150 ; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
    151 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
    152 main_body:
    153   call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
    154   ret void
    155 }
    156 
    157 ; GCN-LABEL: {{^}}store_2darray:
    158 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
    159 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
    160 main_body:
    161   call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
    162   ret void
    163 }
    164 
    165 ; GCN-LABEL: {{^}}store_2dmsaa:
    166 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
    167 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
    168 main_body:
    169   call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
    170   ret void
    171 }
    172 
    173 ; GCN-LABEL: {{^}}store_2darraymsaa:
    174 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
    175 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
    176 main_body:
    177   call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
    178   ret void
    179 }
    180 
    181 ; GCN-LABEL: {{^}}store_mip_1d:
    182 ; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
    183 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) {
    184 main_body:
    185   call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    186   ret void
    187 }
    188 
    189 ; GCN-LABEL: {{^}}store_mip_2d:
    190 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
    191 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) {
    192 main_body:
    193   call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    194   ret void
    195 }
    196 
    197 ; GCN-LABEL: {{^}}store_mip_3d:
    198 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
    199 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) {
    200 main_body:
    201   call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    202   ret void
    203 }
    204 
    205 ; GCN-LABEL: {{^}}store_mip_cube:
    206 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
    207 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
    208 main_body:
    209   call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    210   ret void
    211 }
    212 
    213 ; GCN-LABEL: {{^}}store_mip_1darray:
    214 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
    215 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) {
    216 main_body:
    217   call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    218   ret void
    219 }
    220 
    221 ; GCN-LABEL: {{^}}store_mip_2darray:
    222 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
    223 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
    224 main_body:
    225   call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    226   ret void
    227 }
    228 
    229 ; GCN-LABEL: {{^}}getresinfo_1d:
    230 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
    231 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) {
    232 main_body:
    233   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    234   ret <4 x float> %v
    235 }
    236 
    237 ; GCN-LABEL: {{^}}getresinfo_2d:
    238 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
    239 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) {
    240 main_body:
    241   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    242   ret <4 x float> %v
    243 }
    244 
    245 ; GCN-LABEL: {{^}}getresinfo_3d:
    246 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
    247 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) {
    248 main_body:
    249   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    250   ret <4 x float> %v
    251 }
    252 
    253 ; GCN-LABEL: {{^}}getresinfo_cube:
    254 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
    255 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) {
    256 main_body:
    257   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    258   ret <4 x float> %v
    259 }
    260 
    261 ; GCN-LABEL: {{^}}getresinfo_1darray:
    262 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
    263 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) {
    264 main_body:
    265   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    266   ret <4 x float> %v
    267 }
    268 
    269 ; GCN-LABEL: {{^}}getresinfo_2darray:
    270 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
    271 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) {
    272 main_body:
    273   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    274   ret <4 x float> %v
    275 }
    276 
    277 ; GCN-LABEL: {{^}}getresinfo_2dmsaa:
    278 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
    279 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) {
    280 main_body:
    281   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    282   ret <4 x float> %v
    283 }
    284 
    285 ; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
    286 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
    287 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) {
    288 main_body:
    289   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    290   ret <4 x float> %v
    291 }
    292 
    293 ; GCN-LABEL: {{^}}load_1d_V1:
    294 ; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
    295 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
    296 main_body:
    297   %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
    298   ret float %v
    299 }
    300 
    301 ; GCN-LABEL: {{^}}load_1d_V2:
    302 ; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
    303 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
    304 main_body:
    305   %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
    306   ret <2 x float> %v
    307 }
    308 
    309 ; GCN-LABEL: {{^}}store_1d_V1:
    310 ; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
    311 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
    312 main_body:
    313   call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
    314   ret void
    315 }
    316 
    317 ; GCN-LABEL: {{^}}store_1d_V2:
    318 ; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
    319 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
    320 main_body:
    321   call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
    322   ret void
    323 }
    324 
    325 ; GCN-LABEL: {{^}}load_1d_glc:
    326 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
    327 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
    328 main_body:
    329   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
    330   ret <4 x float> %v
    331 }
    332 
    333 ; GCN-LABEL: {{^}}load_1d_slc:
    334 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
    335 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
    336 main_body:
    337   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
    338   ret <4 x float> %v
    339 }
    340 
    341 ; GCN-LABEL: {{^}}load_1d_glc_slc:
    342 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
    343 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
    344 main_body:
    345   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
    346   ret <4 x float> %v
    347 }
    348 
    349 ; GCN-LABEL: {{^}}store_1d_glc:
    350 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
    351 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
    352 main_body:
    353   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
    354   ret void
    355 }
    356 
    357 ; GCN-LABEL: {{^}}store_1d_slc:
    358 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
    359 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
    360 main_body:
    361   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
    362   ret void
    363 }
    364 
    365 ; GCN-LABEL: {{^}}store_1d_glc_slc:
    366 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
    367 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
    368 main_body:
    369   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
    370   ret void
    371 }
    372 
    373 ; GCN-LABEL: {{^}}getresinfo_dmask0:
    374 ; GCN-NOT: image
    375 ; GCN: ; return to shader part epilog
    376 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
    377 main_body:
    378   %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
    379   ret <4 x float> %r
    380 }
    381 
    382 ; Ideally, the register allocator would avoid the wait here
    383 ;
    384 ; GCN-LABEL: {{^}}image_store_wait:
    385 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
    386 ; SI: s_waitcnt expcnt(0)
    387 ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
    388 ; GCN: s_waitcnt vmcnt(0)
    389 ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
    390 define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
    391 main_body:
    392   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
    393   %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
    394   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
    395   ret void
    396 }
    397 
    398 ; SI won't merge ds memory operations, because of the signed offset bug, so
    399 ; we only have check lines for VI.
    400 ; VI-LABEL: image_load_mmo
    401 ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
    402 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
    403 define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
    404   store float 0.000000e+00, float addrspace(3)* %lds
    405   %c0 = extractelement <2 x i32> %c, i32 0
    406   %c1 = extractelement <2 x i32> %c, i32 1
    407   %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
    408   %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
    409   store float 0.000000e+00, float addrspace(3)* %tmp2
    410   ret float %tex
    411 }
    412 
    413 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
    414 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
    415 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    416 declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    417 declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
    418 declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    419 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    420 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    421 
    422 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
    423 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    424 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    425 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    426 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    427 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    428 
    429 declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
    430 declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
    431 declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    432 declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    433 declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
    434 declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    435 declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    436 declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    437 
    438 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
    439 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    440 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    441 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    442 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    443 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    444 
    445 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    446 declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    447 declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    448 declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    449 declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    450 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    451 declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    452 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
    453 
    454 declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
    455 declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
    456 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
    457 declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
    458 declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
    459 
    460 attributes #0 = { nounwind }
    461 attributes #1 = { nounwind readonly }
    462 attributes #2 = { nounwind readnone }
    463