Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
      4 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
      5 
      6 
      7 declare void @llvm.AMDGPU.barrier.local() convergent nounwind
      8 
      9 ; SI-LABEL: {{^}}private_access_f64_alloca:
     10 
     11 ; SI-ALLOCA: buffer_store_dwordx2
     12 ; SI-ALLOCA: buffer_load_dwordx2
     13 
     14 ; SI-PROMOTE: ds_write_b64
     15 ; SI-PROMOTE: ds_read_b64
     16 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
     17   %val = load double, double addrspace(1)* %in, align 8
     18   %array = alloca double, i32 16, align 8
     19   %ptr = getelementptr double, double* %array, i32 %b
     20   store double %val, double* %ptr, align 8
     21   call void @llvm.AMDGPU.barrier.local() convergent nounwind
     22   %result = load double, double* %ptr, align 8
     23   store double %result, double addrspace(1)* %out, align 8
     24   ret void
     25 }
     26 
     27 ; SI-LABEL: {{^}}private_access_v2f64_alloca:
     28 
     29 ; SI-ALLOCA: buffer_store_dwordx4
     30 ; SI-ALLOCA: buffer_load_dwordx4
     31 
     32 ; SI-PROMOTE: ds_write_b64
     33 ; SI-PROMOTE: ds_write_b64
     34 ; SI-PROMOTE: ds_read_b64
     35 ; SI-PROMOTE: ds_read_b64
     36 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
     37   %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
     38   %array = alloca <2 x double>, i32 16, align 16
     39   %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
     40   store <2 x double> %val, <2 x double>* %ptr, align 16
     41   call void @llvm.AMDGPU.barrier.local() convergent nounwind
     42   %result = load <2 x double>, <2 x double>* %ptr, align 16
     43   store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
     44   ret void
     45 }
     46 
     47 ; SI-LABEL: {{^}}private_access_i64_alloca:
     48 
     49 ; SI-ALLOCA: buffer_store_dwordx2
     50 ; SI-ALLOCA: buffer_load_dwordx2
     51 
     52 ; SI-PROMOTE: ds_write_b64
     53 ; SI-PROMOTE: ds_read_b64
     54 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
     55   %val = load i64, i64 addrspace(1)* %in, align 8
     56   %array = alloca i64, i32 16, align 8
     57   %ptr = getelementptr i64, i64* %array, i32 %b
     58   store i64 %val, i64* %ptr, align 8
     59   call void @llvm.AMDGPU.barrier.local() convergent nounwind
     60   %result = load i64, i64* %ptr, align 8
     61   store i64 %result, i64 addrspace(1)* %out, align 8
     62   ret void
     63 }
     64 
     65 ; SI-LABEL: {{^}}private_access_v2i64_alloca:
     66 
     67 ; SI-ALLOCA: buffer_store_dwordx4
     68 ; SI-ALLOCA: buffer_load_dwordx4
     69 
     70 ; SI-PROMOTE: ds_write_b64
     71 ; SI-PROMOTE: ds_write_b64
     72 ; SI-PROMOTE: ds_read_b64
     73 ; SI-PROMOTE: ds_read_b64
     74 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
     75   %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
     76   %array = alloca <2 x i64>, i32 16, align 16
     77   %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
     78   store <2 x i64> %val, <2 x i64>* %ptr, align 16
     79   call void @llvm.AMDGPU.barrier.local() convergent nounwind
     80   %result = load <2 x i64>, <2 x i64>* %ptr, align 16
     81   store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
     82   ret void
     83 }
     84