Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
      2 
      3 declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
      4 
      5 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_clamp
      6 ; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
      7 define amdgpu_kernel void @test_llvm_amdgcn_udot8_clamp(
      8     i32 addrspace(1)* %r,
      9     <8 x i4> addrspace(1)* %a,
     10     <8 x i4> addrspace(1)* %b,
     11     i32 addrspace(1)* %c) {
     12 entry:
     13   %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a
     14   %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b
     15   %a.val.cast = bitcast <8 x i4> %a.val to i32
     16   %b.val.cast = bitcast <8 x i4> %b.val to i32
     17   %c.val = load i32, i32 addrspace(1)* %c
     18   %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1)
     19   store i32 %r.val, i32 addrspace(1)* %r
     20   ret void
     21 }
     22 
     23 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_no_clamp
     24 ; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
     25 define amdgpu_kernel void @test_llvm_amdgcn_udot8_no_clamp(
     26     i32 addrspace(1)* %r,
     27     <8 x i4> addrspace(1)* %a,
     28     <8 x i4> addrspace(1)* %b,
     29     i32 addrspace(1)* %c) {
     30 entry:
     31   %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a
     32   %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b
     33   %a.val.cast = bitcast <8 x i4> %a.val to i32
     34   %b.val.cast = bitcast <8 x i4> %b.val to i32
     35   %c.val = load i32, i32 addrspace(1)* %c
     36   %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0)
     37   store i32 %r.val, i32 addrspace(1)* %r
     38   ret void
     39 }
     40