Home | History | Annotate | Download | only in transcoding
      1 ;; bash$ cat repro.cl
      2 ;; void device_kernel(__local float* ptr0, __local float* ptr1) {
      3 ;;   *ptr0 = 0;
      4 ;;   *ptr1 = 1;
      5 ;; }
      6 ;;
      7 ;; __kernel void host_kernel(uint size) {
      8 ;;   void(^block)(__local void*, __local void*) = ^(__local void* ptr0, __local void* ptr1){
      9 ;;     device_kernel(ptr0, ptr1);
     10 ;;   };
     11 ;;
     12 ;;   uint wgSize = get_kernel_work_group_size(block);
     13 ;;   uint prefMul =  get_kernel_preferred_work_group_size_multiple(block);
     14 ;;   enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1),
     15 ;;                  0, NULL, NULL, block, size, wgSize * prefMul);
     16 ;; }
     17 ;; bash$
     18 ;; bash$ export PATH_TO_INCLUDE= $PATH_TO_GEN/lib/clang/3.6.1/include
     19 ;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -cl-std=CL2.0 -triple spir64-unknonw-unknown -emit-llvm  -include opencl-20.h  repro.cl -o device_execution.ll
     20 
     21 ;; 1. Check mangling of device execution built-ins for blocks with local memory arguments
     22 ;; 2. Check there is an enqueue_kernel with ellipsis
     23 
     24 ; RUN: llvm-as %s -o %t.bc
     25 ; RUN: llvm-spirv %t.bc -o %t.spv
     26 ; RUN: llvm-spirv -r %t.spv -o %t.bc
     27 ; RUN: llvm-dis < %t.bc | FileCheck %s
     28 
     29 ; ModuleID = 'repro.cl'
     30 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
     31 target triple = "spir64-unknonw-unknown"
     32 
     33 %opencl.block = type opaque
     34 %struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] }
     35 %opencl.queue_t = type opaque
     36 %opencl.clk_event_t = type opaque
     37 
     38 ; Function Attrs: nounwind
     39 define spir_func void @device_kernel(float addrspace(3)* %ptr0, float addrspace(3)* %ptr1) #0 {
     40 entry:
     41   %ptr0.addr = alloca float addrspace(3)*, align 8
     42   %ptr1.addr = alloca float addrspace(3)*, align 8
     43   store float addrspace(3)* %ptr0, float addrspace(3)** %ptr0.addr, align 8
     44   store float addrspace(3)* %ptr1, float addrspace(3)** %ptr1.addr, align 8
     45   %0 = load float addrspace(3)*, float addrspace(3)** %ptr0.addr, align 8
     46   store float 0.000000e+00, float addrspace(3)* %0, align 4
     47   %1 = load float addrspace(3)*, float addrspace(3)** %ptr1.addr, align 8
     48   store float 1.000000e+00, float addrspace(3)* %1, align 4
     49   ret void
     50 }
     51 
     52 ; Function Attrs: nounwind
     53 define spir_kernel void @host_kernel(i32 %size) #0 {
     54 entry:
     55   %size.addr = alloca i32, align 4
     56   %block = alloca %opencl.block*, align 8
     57   %wgSize = alloca i32, align 4
     58   %prefMul = alloca i32, align 4
     59   %agg.tmp = alloca %struct.ndrange_t, align 8
     60   store i32 %size, i32* %size.addr, align 4
     61   %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null)
     62   store %opencl.block* %0, %opencl.block** %block, align 8
     63   %1 = load %opencl.block*, %opencl.block** %block, align 8
     64 ; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE
     65   %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %1)
     66   store i32 %call, i32* %wgSize, align 4
     67   %2 = load %opencl.block*, %opencl.block** %block, align 8
     68 ; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE
     69   %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %2)
     70   store i32 %call1, i32* %prefMul, align 4
     71   %call2 = call spir_func %opencl.queue_t* @_Z17get_default_queuev()
     72   call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp, i64 1)
     73   %3 = load %opencl.block*, %opencl.block** %block, align 8
     74   %4 = load i32, i32* %size.addr, align 4
     75   %5 = load i32, i32* %wgSize, align 4
     76   %6 = load i32, i32* %prefMul, align 4
     77   %mul = mul i32 %5, %6
     78 ; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block* {{.*}}, i32 {{.*}}, i32 {{.*}})
     79   %call3 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...)* @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %3, i32 %4, i32 %mul)
     80   ret void
     81 }
     82 
     83 ; Function Attrs: nounwind
     84 ; CHECK-LABEL: define {{.*}} @__host_kernel_block_invoke
     85 define internal spir_func void @__host_kernel_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %ptr0, i8 addrspace(3)* %ptr1) #0 {
     86 entry:
     87   %.block_descriptor.addr = alloca i8*, align 8
     88   %ptr0.addr = alloca i8 addrspace(3)*, align 8
     89   %ptr1.addr = alloca i8 addrspace(3)*, align 8
     90   %block.addr = alloca <{}>*, align 8
     91   store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
     92   %0 = load i8*, i8** %.block_descriptor.addr
     93   store i8 addrspace(3)* %ptr0, i8 addrspace(3)** %ptr0.addr, align 8
     94   store i8 addrspace(3)* %ptr1, i8 addrspace(3)** %ptr1.addr, align 8
     95   %block = bitcast i8* %.block_descriptor to <{}>*
     96   store <{}>* %block, <{}>** %block.addr, align 8
     97   %1 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0.addr, align 8
     98   %2 = bitcast i8 addrspace(3)* %1 to float addrspace(3)*
     99   %3 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr1.addr, align 8
    100   %4 = bitcast i8 addrspace(3)* %3 to float addrspace(3)*
    101   call spir_func void @device_kernel(float addrspace(3)* %2, float addrspace(3)* %4)
    102   ret void
    103 }
    104 
    105 declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*)
    106 
    107 declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block*) #1
    108 
    109 declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1
    110 
    111 ; CHECK: declare {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block*, i32, ...)
    112 declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1
    113 
    114 declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1
    115 
    116 declare spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret, i64) #1
    117 
    118 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    119 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    120 
    121 !opencl.kernels = !{!0}
    122 !opencl.enable.FP_CONTRACT = !{}
    123 !opencl.spir.version = !{!6}
    124 !opencl.ocl.version = !{!7}
    125 !opencl.used.extensions = !{!8}
    126 !opencl.used.optional.core.features = !{!8}
    127 !opencl.compiler.options = !{!8}
    128 
    129 !0 = !{void (i32)* @host_kernel, !1, !2, !3, !4, !5}
    130 !1 = !{!"kernel_arg_addr_space", i32 0}
    131 !2 = !{!"kernel_arg_access_qual", !"none"}
    132 !3 = !{!"kernel_arg_type", !"uint"}
    133 !4 = !{!"kernel_arg_base_type", !"uint"}
    134 !5 = !{!"kernel_arg_type_qual", !""}
    135 !6 = !{i32 1, i32 2}
    136 !7 = !{i32 2, i32 0}
    137 !8 = !{}
    138