Home | History | Annotate | Download | only in transcoding
      1 ;; Test what the reader correctly mangles get_kernel_work_group_size,
      2 ;; get_kernel_preferred_work_group_size_multiple, and enqueue_kernel built-ins and
      3 ;; produces spir_block_bind for the both blocks one of which is w\o captured context.
      4 ;; Notice what for the moment spir_block_bind is called as many times as how much
      5 ;; built-ins what using it. This is not against SPIR 2.0 specification so it is done this
      6 ;; way to simplify the reader implementation.
      7 ;;
      8 ;; See below how this LLVM IR has been obtained:
      9 ;; bash$
     10 ;; bash$ cat device_execution_multiple_blocks.cl
     11 ;; void block_fn(int arg, __global int* res)
     12 ;; {
     13 ;;   *res = arg;
     14 ;; }
     15 ;;
     16 ;; __global int glbRes = 0;
     17 ;; void (^kernelBlockNoCtx)(void) = ^{ block_fn(1, &glbRes); };
     18 ;;
     19 ;; kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)
     20 ;; {
     21 ;;
     22 ;;
     23 ;;     void (^kernelBlock)(void) = ^{ block_fn(2, res); };
     24 ;;     uint globalSize = get_kernel_work_group_size(kernelBlock);
     25 ;;     uint multiple   = get_kernel_preferred_work_group_size_multiple(kernelBlock);
     26 ;;     uint localSize  = globalSize / multiple;
     27 ;;
     28 ;;     queue_t q1 = get_default_queue();
     29 ;;     ndrange_t ndrange = ndrange_1D(localSize, globalSize);
     30 ;;     enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
     31 ;;     // Enqueue kernel w\o captured context
     32 ;;     enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlockNoCtx);
     33 ;; }
     34 ;; bash$
     35 ;; bash$ export PATH_TO_GEN=path_to_spir20_generator_install_dir
     36 ;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -O2 -cl-std=CL2.0 -triple spir64-unknonw-unknown\
     37 ;; -emit-spirv -include $PATH_TO_GEN/lib/clang/3.6.1/include/opencl-20.h\
     38 ;; device_execution_multiple_blocks.cl -o device_execution_multiple_blocks.ll
     39 
     40 ; RUN: llvm-as %s -o %t.bc
     41 ; RUN: llvm-spirv %t.bc -o %t.spv
     42 ; RUN: llvm-spirv -r %t.spv -o %t.bc
     43 ; RUN: llvm-dis < %t.bc | FileCheck %s
     44 
     45 ; ModuleID = 'device_execution_multiple_blocks.cl'
     46 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
     47 target triple = "spir64-unknonw-unknown"
     48 
     49 %struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] }
     50 %opencl.block = type opaque
     51 %opencl.queue_t = type opaque
     52 
     53 @glbRes = addrspace(1) global i32 0, align 4
     54 
     55 ; Function Attrs: nounwind
     56 define spir_func void @block_fn(i32 %arg, i32 addrspace(1)* nocapture %res) #0 {
     57 entry:
     58   store i32 %arg, i32 addrspace(1)* %res, align 4
     59   ret void
     60 }
     61 
     62 ; Function Attrs: nounwind
     63 define internal spir_func void @kernelBlockNoCtx_block_invoke(i8* nocapture readnone %.block_descriptor) #0 {
     64 entry:
     65   store i32 1, i32 addrspace(1)* @glbRes, align 4
     66   ret void
     67 }
     68 
     69 ; Function Attrs: nounwind
     70 define spir_kernel void @enqueue_block_get_kernel_preferred_work_group_size_multiple(i32 addrspace(1)* %res) #0 {
     71 entry:
     72   %captured = alloca <{ i32 addrspace(1)* }>, align 8
     73   %ndrange = alloca %struct.ndrange_t, align 8
     74   %block.captured = getelementptr inbounds <{ i32 addrspace(1)* }>, <{ i32 addrspace(1)* }>* %captured, i64 0, i32 0
     75   store i32 addrspace(1)* %res, i32 addrspace(1)** %block.captured, align 8
     76   %0 = bitcast <{ i32 addrspace(1)* }>* %captured to i8*
     77 ; CHECK: [[CTX:.*]] = bitcast %0* %captured to i8*
     78   %1 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke to i8*), i32 8, i32 8, i8* %0) #2
     79 ; CHECK: [[BLOCK0:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]])
     80 ; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*[[BLOCK0]])
     81   %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %1) #2
     82 ; CHECK: [[BLOCK1:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]])
     83 ; CHECK:  call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*[[BLOCK1]])
     84   %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %1) #2
     85   %div = udiv i32 %call, %call1
     86   %call2 = call spir_func %opencl.queue_t* @get_default_queue() #2
     87   %conv = zext i32 %div to i64
     88   %conv3 = zext i32 %call to i64
     89   call spir_func void @_Z10ndrange_1Dmm(%struct.ndrange_t* sret %ndrange, i64 %conv, i64 %conv3) #2
     90 ; CHECK: [[BLOCK2:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]])
     91 ; CHECK:  call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK2]])
     92   %call4 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %1) #2
     93 ; CHECK: [[BLOCK3:.*]] = call {{.*}} @spir_block_bind({{.*}}@kernelBlockNoCtx_block_invoke{{.*}}, i32 0, i32 0, i8* null)
     94 ; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK3]])
     95   %2 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @kernelBlockNoCtx_block_invoke to i8*), i32 0, i32 0, i8* null) #2
     96   %call5 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %2) #2
     97   ret void
     98 }
     99 
    100 ; Function Attrs: nounwind
    101 define internal spir_func void @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke(i8* nocapture readonly %.block_descriptor) #0 {
    102 entry:
    103   %block.capture.addr = bitcast i8* %.block_descriptor to i32 addrspace(1)**
    104   %0 = load i32 addrspace(1)*, i32 addrspace(1)** %block.capture.addr, align 8
    105   store i32 2, i32 addrspace(1)* %0, align 4
    106   ret void
    107 }
    108 
    109 declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*)
    110 
    111 declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*) #1
    112 
    113 declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*) #1
    114 
    115 declare spir_func %opencl.queue_t* @get_default_queue() #1
    116 
    117 declare spir_func void @_Z10ndrange_1Dmm(%struct.ndrange_t* sret, i64, i64) #1
    118 
    119 declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, %opencl.block*) #1
    120 
    121 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    122 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    123 attributes #2 = { nounwind }
    124 
    125 !opencl.kernels = !{!0}
    126 !opencl.enable.FP_CONTRACT = !{}
    127 !opencl.spir.version = !{!6}
    128 !opencl.ocl.version = !{!7}
    129 !opencl.used.extensions = !{!8}
    130 !opencl.used.optional.core.features = !{!8}
    131 !opencl.compiler.options = !{!8}
    132 !llvm.ident = !{!9}
    133 
    134 !0 = !{void (i32 addrspace(1)*)* @enqueue_block_get_kernel_preferred_work_group_size_multiple, !1, !2, !3, !4, !5}
    135 !1 = !{!"kernel_arg_addr_space", i32 1}
    136 !2 = !{!"kernel_arg_access_qual", !"none"}
    137 !3 = !{!"kernel_arg_type", !"int*"}
    138 !4 = !{!"kernel_arg_base_type", !"int*"}
    139 !5 = !{!"kernel_arg_type_qual", !""}
    140 !6 = !{i32 1, i32 2}
    141 !7 = !{i32 2, i32 0}
    142 !8 = !{}
    143 !9 = !{!"clang version 3.6.1 (https://github.com/KhronosGroup/SPIR.git 49a8b4a760d227b12116a79b2f7b2e34ef2e6879) (ssh://nnopencl-git-01.inn.intel.com/home/git/repo/opencl_qa-llvm d9b98710f905089caec167209da23af2e4f72bf0)"}
    144