Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s
      2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
      3 
      4 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
      5 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
      6 
      7 ; HSA: kernarg_segment_byte_size = 0
      8 ; MESA: kernarg_segment_byte_size = 16
      9 
     10 ; HSA: s_load_dword s0, s[4:5], 0x0
     11 define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
     12   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
     13   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
     14   %load = load volatile i32, i32 addrspace(4)* %cast
     15   ret void
     16 }
     17 
     18 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
     19 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
     20 
     21 ; HSA: kernarg_segment_byte_size = 48
     22 ; MESA: kernarg_segment_byte_size = 16
     23 
     24 ; HSA: s_load_dword s0, s[4:5], 0x0
     25 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
     26   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
     27   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
     28   %load = load volatile i32, i32 addrspace(4)* %cast
     29   ret void
     30 }
     31 
     32 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
     33 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
     34 
     35 ; HSA: kernarg_segment_byte_size = 112
     36 ; MESA: kernarg_segment_byte_size = 128
     37 
     38 ; HSA: s_load_dword s0, s[4:5], 0x1c
     39 define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
     40   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
     41   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
     42   %load = load volatile i32, i32 addrspace(4)* %cast
     43   ret void
     44 }
     45 
     46 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
     47 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
     48 
     49 ; HSA: kernarg_segment_byte_size = 160
     50 ; MESA: kernarg_segment_byte_size = 128
     51 
     52 ; HSA: s_load_dword s0, s[4:5], 0x1c
     53 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
     54   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
     55   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
     56   %load = load volatile i32, i32 addrspace(4)* %cast
     57   ret void
     58 }
     59 
     60 ; GCN-LABEL: {{^}}func_implicitarg_ptr:
     61 ; GCN: s_waitcnt
     62 ; MESA: s_mov_b64 s[8:9], s[6:7]
     63 ; MESA: s_mov_b32 s11, 0xf000
     64 ; MESA: s_mov_b32 s10, -1
     65 ; MESA: buffer_load_dword v0, off, s[8:11], 0
     66 ; HSA: v_mov_b32_e32 v0, s6
     67 ; HSA: v_mov_b32_e32 v1, s7
     68 ; HSA: flat_load_dword v0, v[0:1]
     69 ; GCN-NEXT: s_waitcnt
     70 ; GCN-NEXT: s_setpc_b64
     71 define void @func_implicitarg_ptr() #0 {
     72   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
     73   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
     74   %load = load volatile i32, i32 addrspace(4)* %cast
     75   ret void
     76 }
     77 
     78 ; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr:
     79 ; GCN: s_waitcnt
     80 ; MESA: s_mov_b64 s[8:9], s[6:7]
     81 ; MESA: s_mov_b32 s11, 0xf000
     82 ; MESA: s_mov_b32 s10, -1
     83 ; MESA: buffer_load_dword v0, off, s[8:11], 0
     84 ; HSA: v_mov_b32_e32 v0, s6
     85 ; HSA: v_mov_b32_e32 v1, s7
     86 ; HSA: flat_load_dword v0, v[0:1]
     87 ; GCN-NEXT: s_waitcnt
     88 ; GCN-NEXT: s_setpc_b64
     89 define void @opencl_func_implicitarg_ptr() #0 {
     90   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
     91   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
     92   %load = load volatile i32, i32 addrspace(4)* %cast
     93   ret void
     94 }
     95 
     96 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
     97 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
     98 ; HSA: kernarg_segment_byte_size = 0
     99 ; MESA: kernarg_segment_byte_size = 16
    100 ; GCN: s_mov_b64 s[6:7], s[4:5]
    101 ; GCN: s_swappc_b64
    102 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
    103   call void @func_implicitarg_ptr()
    104   ret void
    105 }
    106 
    107 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
    108 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
    109 ; HSA: kernarg_segment_byte_size = 48
    110 ; MESA: kernarg_segment_byte_size = 16
    111 ; GCN: s_mov_b64 s[6:7], s[4:5]
    112 ; GCN: s_swappc_b64
    113 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
    114   call void @func_implicitarg_ptr()
    115   ret void
    116 }
    117 
    118 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
    119 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
    120 ; HSA: kernarg_segment_byte_size = 112
    121 ; MESA: kernarg_segment_byte_size = 128
    122 
    123 ; HSA: s_add_u32 s6, s4, 0x70
    124 ; MESA: s_add_u32 s6, s4, 0x70
    125 
    126 ; GCN: s_addc_u32 s7, s5, 0{{$}}
    127 ; GCN: s_swappc_b64
    128 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
    129   call void @func_implicitarg_ptr()
    130   ret void
    131 }
    132 
    133 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
    134 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
    135 ; HSA: kernarg_segment_byte_size = 160
    136 ; MESA: kernarg_segment_byte_size = 128
    137 
    138 ; GCN: s_add_u32 s6, s4, 0x70
    139 
    140 ; GCN: s_addc_u32 s7, s5, 0{{$}}
    141 ; GCN: s_swappc_b64
    142 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
    143   call void @func_implicitarg_ptr()
    144   ret void
    145 }
    146 
    147 ; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func:
    148 ; GCN-NOT: s6
    149 ; GCN-NOT: s7
    150 ; GCN-NOT: s[6:7]
    151 define void @func_call_implicitarg_ptr_func() #0 {
    152   call void @func_implicitarg_ptr()
    153   ret void
    154 }
    155 
    156 ; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func:
    157 ; GCN-NOT: s6
    158 ; GCN-NOT: s7
    159 ; GCN-NOT: s[6:7]
    160 define void @opencl_func_call_implicitarg_ptr_func() #0 {
    161   call void @func_implicitarg_ptr()
    162   ret void
    163 }
    164 
    165 ; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
    166 ; GCN: s_waitcnt
    167 ; MESA: s_mov_b64 s[12:13], s[6:7]
    168 ; MESA: s_mov_b32 s15, 0xf000
    169 ; MESA: s_mov_b32 s14, -1
    170 ; MESA: buffer_load_dword v0, off, s[12:15], 0
    171 ; HSA: v_mov_b32_e32 v0, s6
    172 ; HSA: v_mov_b32_e32 v1, s7
    173 ; HSA: flat_load_dword v0, v[0:1]
    174 ; MESA: s_mov_b32 s10, s14
    175 ; MESA: s_mov_b32 s11, s15
    176 ; MESA: buffer_load_dword v0, off, s[8:11], 0
    177 ; HSA: v_mov_b32_e32 v0, s8
    178 ; HSA: v_mov_b32_e32 v1, s9
    179 ; HSA: flat_load_dword v0, v[0:1]
    180 
    181 ; GCN: s_waitcnt vmcnt(0)
    182 define void @func_kernarg_implicitarg_ptr() #0 {
    183   %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
    184   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
    185   %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
    186   %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
    187   %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
    188   %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
    189   ret void
    190 }
    191 
    192 ; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
    193 ; GCN: s_waitcnt
    194 ; MESA: s_mov_b64 s[12:13], s[6:7]
    195 ; MESA: s_mov_b32 s15, 0xf000
    196 ; MESA: s_mov_b32 s14, -1
    197 ; MESA: buffer_load_dword v0, off, s[12:15], 0
    198 ; HSA: v_mov_b32_e32 v0, s6
    199 ; HSA: v_mov_b32_e32 v1, s7
    200 ; HSA: flat_load_dword v0, v[0:1]
    201 ; MESA: s_mov_b32 s10, s14
    202 ; MESA: s_mov_b32 s11, s15
    203 ; MESA: buffer_load_dword v0, off, s[8:11], 0
    204 ; HSA: v_mov_b32_e32 v0, s8
    205 ; HSA: v_mov_b32_e32 v1, s9
    206 ; HSA: flat_load_dword v0, v[0:1]
    207 
    208 ; GCN: s_waitcnt vmcnt(0)
    209 define void @opencl_func_kernarg_implicitarg_ptr() #0 {
    210   %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
    211   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
    212   %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
    213   %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
    214   %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
    215   %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
    216   ret void
    217 }
    218 
    219 ; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
    220 ; GCN: s_mov_b64 s[6:7], s[4:5]
    221 ; GCN: s_add_u32 s8, s6, 0x70
    222 ; GCN: s_addc_u32 s9, s7, 0
    223 ; GCN: s_swappc_b64
    224 define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
    225   call void @func_kernarg_implicitarg_ptr()
    226   ret void
    227 }
    228 
    229 ; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
    230 ; HSA: kernarg_segment_byte_size = 120
    231 ; MESA: kernarg_segment_byte_size = 84
    232 ; GCN: kernarg_segment_alignment = 6
    233 define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
    234   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
    235   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
    236   %load = load volatile i32, i32 addrspace(4)* %cast
    237   ret void
    238 }
    239 
    240 declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
    241 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
    242 
    243 attributes #0 = { nounwind noinline }
    244 attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
    245 attributes #2 = { nounwind readnone speculatable }
    246