Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
      2 ; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
      3 ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
      4 
      5 ; GCN-LABEL: {{^}}image_load_f16:
      6 ; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
      7 define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
      8 main_body:
      9   %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
     10   ret half %tex
     11 }
     12 
     13 ; GCN-LABEL: {{^}}image_load_v2f16:
     14 ; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
     15 ; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
     16 define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
     17 main_body:
     18   %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
     19   %r = bitcast <2 x half> %tex to float
     20   ret float %r
     21 }
     22 
     23 ; GCN-LABEL: {{^}}image_load_v4f16:
     24 ; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
     25 ; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
     26 define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
     27 main_body:
     28   %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
     29   %r = bitcast <4 x half> %tex to <2 x float>
     30   ret <2 x float> %r
     31 }
     32 
     33 ; GCN-LABEL: {{^}}image_load_mip_v4f16:
     34 ; UNPACKED: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
     35 ; PACKED: image_load_mip v[0:1], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
     36 define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
     37 main_body:
     38   %tex = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
     39   %r = bitcast <4 x half> %tex to <2 x float>
     40   ret <2 x float> %r
     41 }
     42 
     43 ; GCN-LABEL: {{^}}image_load_3d_v2f16:
     44 ; UNPACKED: image_load v[0:1], v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
     45 ; PACKED: image_load v0, v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
     46 define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
     47 main_body:
     48   %tex = call <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32 3, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
     49   %x = bitcast <2 x half> %tex to float
     50   ret float %x
     51 }
     52 
     53 ; GCN-LABEL: {{^}}image_store_f16
     54 ; GCN: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
     55 define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
     56 main_body:
     57   call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
     58   ret void
     59 }
     60 
     61 ; GCN-LABEL: {{^}}image_store_v2f16
     62 ; UNPACKED: v_lshrrev_b32_e32
     63 ; UNPACKED: v_and_b32_e32
     64 ; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
     65 ; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
     66 define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) {
     67 main_body:
     68   %data = bitcast float %in to <2 x half>
     69   call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %data, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
     70   ret void
     71 }
     72 
     73 ; GCN-LABEL: {{^}}image_store_v4f16
     74 ; UNPACKED: v_lshrrev_b32_e32
     75 ; UNPACKED: v_and_b32_e32
     76 ; UNPACKED: v_lshrrev_b32_e32
     77 ; UNPACKED: v_and_b32_e32
     78 ; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
     79 ; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
     80 define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
     81 main_body:
     82   %data = bitcast <2 x float> %in to <4 x half>
     83   call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
     84   ret void
     85 }
     86 
     87 ; GCN-LABEL: {{^}}image_store_mip_1d_v4f16
     88 ; UNPACKED: v_lshrrev_b32_e32
     89 ; UNPACKED: v_and_b32_e32
     90 ; UNPACKED: v_lshrrev_b32_e32
     91 ; UNPACKED: v_and_b32_e32
     92 ; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
     93 ; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
     94 define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) {
     95 main_body:
     96   %data = bitcast <2 x float> %in to <4 x half>
     97   call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
     98   ret void
     99 }
    100 
    101 declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
    102 declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
    103 declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
    104 declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    105 declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
    106 
    107 declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0
    108 declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
    109 declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
    110 declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
    111 declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
    112 
    113 attributes #0 = { nounwind }
    114 attributes #1 = { nounwind readonly }
    115 attributes #2 = { nounwind readnone }
    116