Home | History | Annotate | Download | only in transcoding
      1 ; RUN: llvm-as %s -o %t.bc
      2 ; RUN: llvm-spirv %t.bc -spirv-text -o %t.txt
      3 ; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV
      4 ; RUN: llvm-spirv %t.bc -o %t.spv
      5 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
      6 ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
      7 
      8 ; CHECK-LLVM: call spir_func %opencl.event_t{{.*}}* @_Z29async_work_group_strided_copyPU3AS1Dv2_hPKU3AS3S_jj9ocl_event(
      9 
     10 ; CHECK-SPIRV-DAG: GroupAsyncCopy {{[0-9]+}} {{[0-9]+}} [[Scope:[0-9]+]]
     11 ; CHECK-SPIRV-DAG: Constant {{[0-9]+}} [[Scope]]
     12 
     13 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
     14 target triple = "spir-unknown-unknown"
     15 
     16 %opencl.event_t = type opaque
     17 
     18 ; Function Attrs: nounwind
     19 define spir_kernel void @test_fn(<2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(3)* %localBuffer, i32 %copiesPerWorkgroup, i32 %copiesPerWorkItem) #0 {
     20 entry:
     21   %src.addr = alloca <2 x i8> addrspace(1)*, align 4
     22   %dst.addr = alloca <2 x i8> addrspace(1)*, align 4
     23   %localBuffer.addr = alloca <2 x i8> addrspace(3)*, align 4
     24   %copiesPerWorkgroup.addr = alloca i32, align 4
     25   %copiesPerWorkItem.addr = alloca i32, align 4
     26   %i = alloca i32, align 4
     27   %event = alloca %opencl.event_t*, align 4
     28   store <2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)** %src.addr, align 4
     29   store <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(1)** %dst.addr, align 4
     30   store <2 x i8> addrspace(3)* %localBuffer, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
     31   store i32 %copiesPerWorkgroup, i32* %copiesPerWorkgroup.addr, align 4
     32   store i32 %copiesPerWorkItem, i32* %copiesPerWorkItem.addr, align 4
     33   store i32 0, i32* %i, align 4
     34   br label %for.cond
     35 
     36 for.cond:                                         ; preds = %for.inc, %entry
     37   %0 = load i32, i32* %i, align 4
     38   %1 = load i32, i32* %copiesPerWorkItem.addr, align 4
     39   %cmp = icmp slt i32 %0, %1
     40   br i1 %cmp, label %for.body, label %for.end
     41 
     42 for.body:                                         ; preds = %for.cond
     43   %call = call spir_func i32 @_Z12get_local_idj(i32 0)
     44   %2 = load i32, i32* %copiesPerWorkItem.addr, align 4
     45   %mul = mul i32 %call, %2
     46   %3 = load i32, i32* %i, align 4
     47   %add = add i32 %mul, %3
     48   %4 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
     49   %arrayidx = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %4, i32 %add
     50   store <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* %arrayidx, align 2
     51   br label %for.inc
     52 
     53 for.inc:                                          ; preds = %for.body
     54   %5 = load i32, i32* %i, align 4
     55   %inc = add nsw i32 %5, 1
     56   store i32 %inc, i32* %i, align 4
     57   br label %for.cond
     58 
     59 for.end:                                          ; preds = %for.cond
     60   call spir_func void @_Z7barrierj(i32 1)
     61   store i32 0, i32* %i, align 4
     62   br label %for.cond1
     63 
     64 for.cond1:                                        ; preds = %for.inc12, %for.end
     65   %6 = load i32, i32* %i, align 4
     66   %7 = load i32, i32* %copiesPerWorkItem.addr, align 4
     67   %cmp2 = icmp slt i32 %6, %7
     68   br i1 %cmp2, label %for.body3, label %for.end14
     69 
     70 for.body3:                                        ; preds = %for.cond1
     71   %call4 = call spir_func i32 @_Z13get_global_idj(i32 0)
     72   %8 = load i32, i32* %copiesPerWorkItem.addr, align 4
     73   %mul5 = mul i32 %call4, %8
     74   %9 = load i32, i32* %i, align 4
     75   %add6 = add i32 %mul5, %9
     76   %10 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %src.addr, align 4
     77   %arrayidx7 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %10, i32 %add6
     78   %11 = load <2 x i8> , <2 x i8> addrspace(1)* %arrayidx7, align 2
     79   %call8 = call spir_func i32 @_Z12get_local_idj(i32 0)
     80   %12 = load i32, i32* %copiesPerWorkItem.addr, align 4
     81   %mul9 = mul i32 %call8, %12
     82   %13 = load i32, i32* %i, align 4
     83   %add10 = add i32 %mul9, %13
     84   %14 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
     85   %arrayidx11 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %14, i32 %add10
     86   store <2 x i8> %11, <2 x i8> addrspace(3)* %arrayidx11, align 2
     87   br label %for.inc12
     88 
     89 for.inc12:                                        ; preds = %for.body3
     90   %15 = load i32, i32* %i, align 4
     91   %inc13 = add nsw i32 %15, 1
     92   store i32 %inc13, i32* %i, align 4
     93   br label %for.cond1
     94 
     95 for.end14:                                        ; preds = %for.cond1
     96   call spir_func void @_Z7barrierj(i32 1)
     97   %16 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %dst.addr, align 4
     98   %17 = load i32, i32* %copiesPerWorkgroup.addr, align 4
     99   %call15 = call spir_func i32 @_Z12get_group_idj(i32 0)
    100   %mul16 = mul i32 %17, %call15
    101   %add.ptr = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %16, i32 %mul16
    102   %18 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
    103   %19 = load i32, i32* %copiesPerWorkgroup.addr, align 4
    104   %call17 = call spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)* %add.ptr, <2 x i8> addrspace(3)* %18, i32 %19, %opencl.event_t* null)
    105   store %opencl.event_t* %call17, %opencl.event_t** %event, align 4
    106   %20 = addrspacecast %opencl.event_t** %event to %opencl.event_t* addrspace(4)*
    107   call spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32 1, %opencl.event_t* addrspace(4)* %20)
    108   ret void
    109 }
    110 
    111 declare spir_func i32 @_Z12get_local_idj(i32) #1
    112 
    113 declare spir_func void @_Z7barrierj(i32) #1
    114 
    115 declare spir_func i32 @_Z13get_global_idj(i32) #1
    116 
    117 declare spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)*, <2 x i8> addrspace(3)*, i32, %opencl.event_t*) #1
    118 
    119 declare spir_func i32 @_Z12get_group_idj(i32) #1
    120 
    121 declare spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32, %opencl.event_t* addrspace(4)*) #1
    122 
    123 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    124 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    125 
    126 !opencl.kernels = !{!0}
    127 !opencl.enable.FP_CONTRACT = !{}
    128 !opencl.spir.version = !{!6}
    129 !opencl.ocl.version = !{!7}
    130 !opencl.used.extensions = !{!8}
    131 !opencl.used.optional.core.features = !{!8}
    132 !opencl.compiler.options = !{!8}
    133 
    134 !0 = !{void (<2 x i8> addrspace(1)*, <2 x i8> addrspace(1)*, <2 x i8> addrspace(3)*, i32, i32)* @test_fn, !1, !2, !3, !4, !5}
    135 !1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 3, i32 0, i32 0}
    136 !2 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none", !"none"}
    137 !3 = !{!"kernel_arg_type", !"char2*", !"char2*", !"char2*", !"int", !"int"}
    138 !4 = !{!"kernel_arg_base_type", !"char2*", !"char2*", !"char2*", !"int", !"int"}
    139 !5 = !{!"kernel_arg_type_qual", !"const", !"", !"", !"", !""}
    140 !6 = !{i32 1, i32 2}
    141 !7 = !{i32 2, i32 0}
    142 !8 = !{}
    143