Home | History | Annotate | Download | only in transcoding
      1 ;; bash$ cat device_execution_overloading.cl
      2 ;; void device_kernel_with_local_args(__local float* ptr0, __local float* ptr1) {
      3 ;;   *ptr0 = 0;
      4 ;;   *ptr1 = 1;
      5 ;; }
      6 ;;
      7 ;; void device_kernel(__global float* ptr) {
      8 ;;   *ptr = 3;
      9 ;; }
     10 ;;
     11 ;; __kernel void host_kernel(uint size, __global float* ptr) {
     12 ;;   void(^block_with_local)(__local void*, __local void*) = ^(__local void* ptr0, __local void* ptr1){
     13 ;;     device_kernel_with_local_args(ptr0, ptr1);
     14 ;;   };
     15 ;;
     16 ;;   void(^block)(void) = ^{
     17 ;;     device_kernel(ptr);
     18 ;;   };
     19 ;;
     20 ;n;   uint wgSize = get_kernel_work_group_size(block_with_local);
     21 ;;   uint prefMul =  get_kernel_preferred_work_group_size_multiple(block_with_local);
     22 ;;   enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1),
     23 ;;                  0, NULL, NULL, block_with_local, size, wgSize * prefMul);
     24 ;;
     25 ;;   wgSize = get_kernel_work_group_size(block);
     26 ;;   prefMul =  get_kernel_preferred_work_group_size_multiple(block);
     27 ;;   enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1),
     28 ;;                  0, NULL, NULL, block);
     29 ;; }
     30 ;; bash$
     31 ;;$PATH_TO_GEN/bin/clang -cc1 -x cl -O0 -cl-std=CL2.0 -triple spir64-unknonw-unknown -include $PATH_TO_GEN/lib/clang/3.6.1/include/opencl-20.h -emit-llvm device_execution_overloading.cl -o device_execution_overloading.ll
     32 
     33 ;; Test overloading of device exectuion built-ins is OK after translation from SPIR-V
     34 
     35 ; RUN: llvm-as %s -o %t.bc
     36 ; RUN: llvm-spirv %t.bc -o %t.spv
     37 ; RUN: llvm-spirv -r %t.spv -o %t.bc
     38 ; RUN: llvm-dis < %t.bc | FileCheck %s
     39 
     40 ; ModuleID = 'device_execution_overloading.cl'
     41 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
     42 target triple = "spir64-unknonw-unknown"
     43 
     44 %opencl.block = type opaque
     45 %struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] }
     46 %opencl.queue_t = type opaque
     47 %opencl.clk_event_t = type opaque
     48 
     49 ; Function Attrs: nounwind
     50 define spir_func void @device_kernel_with_local_args(float addrspace(3)* %ptr0, float addrspace(3)* %ptr1) #0 {
     51 entry:
     52   %ptr0.addr = alloca float addrspace(3)*, align 8
     53   %ptr1.addr = alloca float addrspace(3)*, align 8
     54   store float addrspace(3)* %ptr0, float addrspace(3)** %ptr0.addr, align 8
     55   store float addrspace(3)* %ptr1, float addrspace(3)** %ptr1.addr, align 8
     56   %0 = load float addrspace(3)*, float addrspace(3)** %ptr0.addr, align 8
     57   store float 0.000000e+00, float addrspace(3)* %0, align 4
     58   %1 = load float addrspace(3)*, float addrspace(3)** %ptr1.addr, align 8
     59   store float 1.000000e+00, float addrspace(3)* %1, align 4
     60   ret void
     61 }
     62 
     63 ; Function Attrs: nounwind
     64 define spir_func void @device_kernel(float addrspace(1)* %ptr) #0 {
     65 entry:
     66   %ptr.addr = alloca float addrspace(1)*, align 8
     67   store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8
     68   %0 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8
     69   store float 3.000000e+00, float addrspace(1)* %0, align 4
     70   ret void
     71 }
     72 
     73 ; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE
     74 ; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE
     75 ; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz
     76 
     77 ; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvvE
     78 ; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE
     79 ; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE
     80 
     81 ; Function Attrs: nounwind
     82 define spir_kernel void @host_kernel(i32 %size, float addrspace(1)* %ptr) #0 {
     83 entry:
     84   %size.addr = alloca i32, align 4
     85   %ptr.addr = alloca float addrspace(1)*, align 8
     86   %block_with_local = alloca %opencl.block*, align 8
     87   %block = alloca %opencl.block*, align 8
     88   %captured = alloca <{ float addrspace(1)* }>, align 8
     89   %wgSize = alloca i32, align 4
     90   %prefMul = alloca i32, align 4
     91   %agg.tmp = alloca %struct.ndrange_t, align 8
     92   %agg.tmp8 = alloca %struct.ndrange_t, align 8
     93   store i32 %size, i32* %size.addr, align 4
     94   store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8
     95   %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null)
     96   store %opencl.block* %0, %opencl.block** %block_with_local, align 8
     97   %block.captured = getelementptr inbounds <{float addrspace(1)* }>, <{ float addrspace(1)* }>* %captured, i32 0, i32 0
     98   %1 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8
     99   store float addrspace(1)* %1, float addrspace(1)** %block.captured, align 8
    100   %2 = bitcast <{ float addrspace(1)* }>* %captured to i8*
    101   %3 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__host_kernel_block_invoke_2 to i8*), i32 8, i32 8, i8* %2)
    102   store %opencl.block* %3, %opencl.block** %block, align 8
    103   %4 = load %opencl.block*, %opencl.block** %block_with_local, align 8
    104   %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %4)
    105   store i32 %call, i32* %wgSize, align 4
    106   %5 = load %opencl.block*, %opencl.block** %block_with_local, align 8
    107   %call2 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %5)
    108   store i32 %call2, i32* %prefMul, align 4
    109   %call3 = call spir_func %opencl.queue_t* @_Z17get_default_queuev()
    110   call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp, i64 1)
    111   %6 = load %opencl.block*, %opencl.block** %block_with_local, align 8
    112   %7 = load i32, i32* %size.addr, align 4
    113   %8 = load i32, i32* %wgSize, align 4
    114   %9 = load i32, i32* %prefMul, align 4
    115   %mul = mul i32 %8, %9
    116   %call4 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...)* @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call3, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %6, i32 %7, i32 %mul)
    117   %10 = load %opencl.block*, %opencl.block** %block, align 8
    118   %call5 = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %10)
    119   store i32 %call5, i32* %wgSize, align 4
    120   %11 = load %opencl.block*, %opencl.block** %block, align 8
    121   %call6 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %11)
    122   store i32 %call6, i32* %prefMul, align 4
    123   %call7 = call spir_func %opencl.queue_t* @_Z17get_default_queuev()
    124   call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp8, i64 1)
    125   %12 = load %opencl.block*, %opencl.block** %block, align 8
    126   %call9 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t* %call7, i32 241, %struct.ndrange_t* byval %agg.tmp8, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %12)
    127   ret void
    128 }
    129 
    130 ; Function Attrs: nounwind
    131 define internal spir_func void @__host_kernel_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %ptr0, i8 addrspace(3)* %ptr1) #0 {
    132 entry:
    133   %.block_descriptor.addr = alloca i8*, align 8
    134   %ptr0.addr = alloca i8 addrspace(3)*, align 8
    135   %ptr1.addr = alloca i8 addrspace(3)*, align 8
    136   %block.addr = alloca <{}>*, align 8
    137   store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
    138   %0 = load i8*, i8** %.block_descriptor.addr
    139   store i8 addrspace(3)* %ptr0, i8 addrspace(3)** %ptr0.addr, align 8
    140   store i8 addrspace(3)* %ptr1, i8 addrspace(3)** %ptr1.addr, align 8
    141   %block = bitcast i8* %.block_descriptor to <{}>*
    142   store <{}>* %block, <{}>** %block.addr, align 8
    143   %1 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0.addr, align 8
    144   %2 = bitcast i8 addrspace(3)* %1 to float addrspace(3)*
    145   %3 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr1.addr, align 8
    146   %4 = bitcast i8 addrspace(3)* %3 to float addrspace(3)*
    147   call spir_func void @device_kernel_with_local_args(float addrspace(3)* %2, float addrspace(3)* %4)
    148   ret void
    149 }
    150 
    151 declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*)
    152 
    153 ; Function Attrs: nounwind
    154 define internal spir_func void @__host_kernel_block_invoke_2(i8* %.block_descriptor) #0 {
    155 entry:
    156   %.block_descriptor.addr = alloca i8*, align 8
    157   %block.addr = alloca <{ float addrspace(1)* }>*, align 8
    158   store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
    159   %0 = load i8*, i8** %.block_descriptor.addr
    160   %block = bitcast i8* %.block_descriptor to <{ float addrspace(1)* }>*
    161   store <{ float addrspace(1)* }>* %block, <{ float addrspace(1)* }>** %block.addr, align 8
    162   %block.capture.addr = getelementptr inbounds <{ float addrspace(1)* }>, <{ float addrspace(1)* }>* %block, i32 0, i32 0
    163   %1 = load float addrspace(1)*, float addrspace(1)** %block.capture.addr, align 8
    164   call spir_func void @device_kernel(float addrspace(1)* %1)
    165   ret void
    166 }
    167 
    168 declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block*) #1
    169 
    170 declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1
    171 
    172 declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1
    173 
    174 declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1
    175 
    176 declare spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret, i64) #1
    177 
    178 declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*) #1
    179 
    180 declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*) #1
    181 
    182 declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*) #1
    183 
    184 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    185 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    186 
    187 !opencl.kernels = !{!0}
    188 !opencl.enable.FP_CONTRACT = !{}
    189 !opencl.spir.version = !{!6}
    190 !opencl.ocl.version = !{!7}
    191 !opencl.used.extensions = !{!8}
    192 !opencl.used.optional.core.features = !{!8}
    193 !opencl.compiler.options = !{!8}
    194 
    195 !0 = !{void (i32, float addrspace(1)*)* @host_kernel, !1, !2, !3, !4, !5}
    196 !1 = !{!"kernel_arg_addr_space", i32 0, i32 1}
    197 !2 = !{!"kernel_arg_access_qual", !"none", !"none"}
    198 !3 = !{!"kernel_arg_type", !"uint", !"float*"}
    199 !4 = !{!"kernel_arg_base_type", !"uint", !"float*"}
    200 !5 = !{!"kernel_arg_type_qual", !"", !""}
    201 !6 = !{i32 1, i32 2}
    202 !7 = !{i32 2, i32 0}
    203 !8 = !{}
    204