Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
      2 
      3 declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i32, i1) #0
      4 
      5 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
      6 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
      7 
      8 @global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
      9 @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
     10 
     11 ; HSA: @store_cast_0_flat_to_group_addrspacecast() #1
     12 define void @store_cast_0_flat_to_group_addrspacecast() #1 {
     13   store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
     14   ret void
     15 }
     16 
     17 ; HSA: @store_cast_0_group_to_flat_addrspacecast() #2
     18 define void @store_cast_0_group_to_flat_addrspacecast() #1 {
     19   store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)
     20   ret void
     21 }
     22 
     23 ; HSA: define void @store_constant_cast_group_gv_to_flat() #2
     24 define void @store_constant_cast_group_gv_to_flat() #1 {
     25   store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*)
     26   ret void
     27 }
     28 
     29 ; HSA: @store_constant_cast_group_gv_gep_to_flat() #2
     30 define void @store_constant_cast_group_gv_gep_to_flat() #1 {
     31   store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
     32   ret void
     33 }
     34 
     35 ; HSA: @store_constant_cast_global_gv_to_flat() #1
     36 define void @store_constant_cast_global_gv_to_flat() #1 {
     37   store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*)
     38   ret void
     39 }
     40 
     41 ; HSA: @store_constant_cast_global_gv_gep_to_flat() #1
     42 define void @store_constant_cast_global_gv_gep_to_flat() #1 {
     43   store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
     44   ret void
     45 }
     46 
     47 ; HSA: @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
     48 define void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
     49   %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
     50   store i32 %val, i32 addrspace(1)* %out
     51   ret void
     52 }
     53 
     54 ; HSA: @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
     55 define void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
     56   %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst
     57   store i32 %val, i32 addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; HSA: @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
     62 define void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
     63   %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
     64   %val0 = extractvalue { i32, i1 } %val, 0
     65   store i32 %val0, i32 addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
     70 define void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
     71   call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* %out, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i32 4, i1 false)
     72   ret void
     73 }
     74 
     75 ; Can't just search the pointer value
     76 ; HSA: @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #2
     77 define void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
     78   store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out
     79   ret void
     80 }
     81 
     82 ; Can't just search pointer types
     83 ; HSA: @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #2
     84 define void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
     85   store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out
     86   ret void
     87 }
     88 
     89 ; Cast group to flat, do GEP, cast back to group
     90 ; HSA: @store_constant_cast_group_gv_gep_to_flat_to_group() #2
     91 define void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
     92   store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
     93   ret void
     94 }
     95 
     96 ; HSA: @ret_constant_cast_group_gv_gep_to_flat_to_group() #2
     97 define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
     98   ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
     99 }
    100 
    101 ; HSA: attributes #0 = { argmemonly nounwind }
    102 ; HSA: attributes #1 = { nounwind }
    103 ; HSA: attributes #2 = { nounwind "amdgpu-queue-ptr" }
    104 
    105 attributes #0 = { argmemonly nounwind }
    106 attributes #1 = { nounwind }
    107