Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
      2 ; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
      3 ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
      4 ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
      5 
      6 ; Disable optimizations in case there are optimizations added that
      7 ; specialize away generic pointer accesses.
      8 
      9 
     10 ; These testcases might become useless when there are optimizations to
     11 ; remove generic pointers.
     12 
     13 ; CHECK-LABEL: {{^}}store_flat_i32:
     14 ; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
     15 ; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
     16 ; CHECK: s_waitcnt lgkmcnt(0)
     17 ; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
     18 ; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
     19 ; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
     20 ; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
     21 define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
     22   %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
     23   store i32 %x, i32 addrspace(4)* %fptr, align 4
     24   ret void
     25 }
     26 
     27 ; CHECK-LABEL: {{^}}store_flat_i64:
     28 ; CHECK: flat_store_dwordx2
     29 define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
     30   %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
     31   store i64 %x, i64 addrspace(4)* %fptr, align 8
     32   ret void
     33 }
     34 
     35 ; CHECK-LABEL: {{^}}store_flat_v4i32:
     36 ; CHECK: flat_store_dwordx4
     37 define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
     38   %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
     39   store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
     40   ret void
     41 }
     42 
     43 ; CHECK-LABEL: {{^}}store_flat_trunc_i16:
     44 ; CHECK: flat_store_short
     45 define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
     46   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
     47   %y = trunc i32 %x to i16
     48   store i16 %y, i16 addrspace(4)* %fptr, align 2
     49   ret void
     50 }
     51 
     52 ; CHECK-LABEL: {{^}}store_flat_trunc_i8:
     53 ; CHECK: flat_store_byte
     54 define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
     55   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
     56   %y = trunc i32 %x to i8
     57   store i8 %y, i8 addrspace(4)* %fptr, align 2
     58   ret void
     59 }
     60 
     61 
     62 
     63 ; CHECK-LABEL: load_flat_i32:
     64 ; CHECK: flat_load_dword
     65 define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
     66   %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
     67   %fload = load i32, i32 addrspace(4)* %fptr, align 4
     68   store i32 %fload, i32 addrspace(1)* %out, align 4
     69   ret void
     70 }
     71 
     72 ; CHECK-LABEL: load_flat_i64:
     73 ; CHECK: flat_load_dwordx2
     74 define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
     75   %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
     76   %fload = load i64, i64 addrspace(4)* %fptr, align 4
     77   store i64 %fload, i64 addrspace(1)* %out, align 8
     78   ret void
     79 }
     80 
     81 ; CHECK-LABEL: load_flat_v4i32:
     82 ; CHECK: flat_load_dwordx4
     83 define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
     84   %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
     85   %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 4
     86   store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
     87   ret void
     88 }
     89 
     90 ; CHECK-LABEL: sextload_flat_i8:
     91 ; CHECK: flat_load_sbyte
     92 define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
     93   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
     94   %fload = load i8, i8 addrspace(4)* %fptr, align 4
     95   %ext = sext i8 %fload to i32
     96   store i32 %ext, i32 addrspace(1)* %out, align 4
     97   ret void
     98 }
     99 
    100 ; CHECK-LABEL: zextload_flat_i8:
    101 ; CHECK: flat_load_ubyte
    102 define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
    103   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
    104   %fload = load i8, i8 addrspace(4)* %fptr, align 4
    105   %ext = zext i8 %fload to i32
    106   store i32 %ext, i32 addrspace(1)* %out, align 4
    107   ret void
    108 }
    109 
    110 ; CHECK-LABEL: sextload_flat_i16:
    111 ; CHECK: flat_load_sshort
    112 define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
    113   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
    114   %fload = load i16, i16 addrspace(4)* %fptr, align 4
    115   %ext = sext i16 %fload to i32
    116   store i32 %ext, i32 addrspace(1)* %out, align 4
    117   ret void
    118 }
    119 
    120 ; CHECK-LABEL: zextload_flat_i16:
    121 ; CHECK: flat_load_ushort
    122 define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
    123   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
    124   %fload = load i16, i16 addrspace(4)* %fptr, align 4
    125   %ext = zext i16 %fload to i32
    126   store i32 %ext, i32 addrspace(1)* %out, align 4
    127   ret void
    128 }
    129 
    130 declare void @llvm.AMDGPU.barrier.local() #1
    131 declare i32 @llvm.r600.read.tidig.x() #3
    132 
    133 attributes #0 = { nounwind }
    134 attributes #1 = { nounwind convergent }
    135 attributes #3 = { nounwind readnone }
    136