Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VIPLUS %s
      2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VIPLUS %s
      3 
      4 ; FIXME: Need to handle non-uniform case for function below (load without gep).
      5 ; GCN-LABEL: {{^}}v_test_imax_sge_i16:
      6 ; VIPLUS: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
      7 define amdgpu_kernel void @v_test_imax_sge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
      8   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
      9   %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
     10   %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
     11   %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
     12   %a = load i16, i16 addrspace(1)* %gep0, align 4
     13   %b = load i16, i16 addrspace(1)* %gep1, align 4
     14   %cmp = icmp sge i16 %a, %b
     15   %val = select i1 %cmp, i16 %a, i16 %b
     16   store i16 %val, i16 addrspace(1)* %outgep, align 4
     17   ret void
     18 }
     19 
     20 ; FIXME: Need to handle non-uniform case for function below (load without gep).
     21 ; GCN-LABEL: {{^}}v_test_imax_sge_v2i16:
     22 ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     23 ; VI: v_max_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
     24 
     25 ; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     26 define amdgpu_kernel void @v_test_imax_sge_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %aptr, <2 x i16> addrspace(1)* %bptr) nounwind {
     27   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     28   %gep0 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %aptr, i32 %tid
     29   %gep1 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %bptr, i32 %tid
     30   %outgep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
     31   %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep0, align 4
     32   %b = load <2 x i16>, <2 x i16> addrspace(1)* %gep1, align 4
     33   %cmp = icmp sge <2 x i16> %a, %b
     34   %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
     35   store <2 x i16> %val, <2 x i16> addrspace(1)* %outgep, align 4
     36   ret void
     37 }
     38 
     39 ; FIXME: Need to handle non-uniform case for function below (load without gep).
     40 ; GCN-LABEL: {{^}}v_test_imax_sge_v3i16:
     41 ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     42 ; VI: v_max_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
     43 ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     44 ; VI-NOT: v_max_i16
     45 
     46 ; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     47 ; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     48 define amdgpu_kernel void @v_test_imax_sge_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %aptr, <3 x i16> addrspace(1)* %bptr) nounwind {
     49   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     50   %gep0 = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %aptr, i32 %tid
     51   %gep1 = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %bptr, i32 %tid
     52   %outgep = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
     53   %a = load <3 x i16>, <3 x i16> addrspace(1)* %gep0, align 4
     54   %b = load <3 x i16>, <3 x i16> addrspace(1)* %gep1, align 4
     55   %cmp = icmp sge <3 x i16> %a, %b
     56   %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
     57   store <3 x i16> %val, <3 x i16> addrspace(1)* %outgep, align 4
     58   ret void
     59 }
     60 
     61 ; FIXME: Need to handle non-uniform case for function below (load without gep).
     62 ; GCN-LABEL: {{^}}v_test_imax_sge_v4i16:
     63 ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     64 ; VI: v_max_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
     65 ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     66 ; VI: v_max_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
     67 
     68 ; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     69 ; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     70 define amdgpu_kernel void @v_test_imax_sge_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %aptr, <4 x i16> addrspace(1)* %bptr) nounwind {
     71   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     72   %gep0 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %aptr, i32 %tid
     73   %gep1 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %bptr, i32 %tid
     74   %outgep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
     75   %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep0, align 4
     76   %b = load <4 x i16>, <4 x i16> addrspace(1)* %gep1, align 4
     77   %cmp = icmp sge <4 x i16> %a, %b
     78   %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
     79   store <4 x i16> %val, <4 x i16> addrspace(1)* %outgep, align 4
     80   ret void
     81 }
     82 
     83 ; FIXME: Need to handle non-uniform case for function below (load without gep).
     84 ; GCN-LABEL: {{^}}v_test_imax_sgt_i16:
     85 ; VIPLUS: v_max_i16_e32
     86 define amdgpu_kernel void @v_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
     87   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     88   %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
     89   %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
     90   %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
     91   %a = load i16, i16 addrspace(1)* %gep0, align 4
     92   %b = load i16, i16 addrspace(1)* %gep1, align 4
     93   %cmp = icmp sgt i16 %a, %b
     94   %val = select i1 %cmp, i16 %a, i16 %b
     95   store i16 %val, i16 addrspace(1)* %outgep, align 4
     96   ret void
     97 }
     98 
     99 ; FIXME: Need to handle non-uniform case for function below (load without gep).
    100 ; GCN-LABEL: {{^}}v_test_umax_uge_i16:
    101 ; VIPLUS: v_max_u16_e32
    102 define amdgpu_kernel void @v_test_umax_uge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
    103   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    104   %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
    105   %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
    106   %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
    107   %a = load i16, i16 addrspace(1)* %gep0, align 4
    108   %b = load i16, i16 addrspace(1)* %gep1, align 4
    109   %cmp = icmp uge i16 %a, %b
    110   %val = select i1 %cmp, i16 %a, i16 %b
    111   store i16 %val, i16 addrspace(1)* %outgep, align 4
    112   ret void
    113 }
    114 
    115 ; FIXME: Need to handle non-uniform case for function below (load without gep).
    116 ; GCN-LABEL: {{^}}v_test_umax_ugt_i16:
    117 ; VIPLUS: v_max_u16_e32
    118 define amdgpu_kernel void @v_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
    119   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    120   %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
    121   %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
    122   %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
    123   %a = load i16, i16 addrspace(1)* %gep0, align 4
    124   %b = load i16, i16 addrspace(1)* %gep1, align 4
    125   %cmp = icmp ugt i16 %a, %b
    126   %val = select i1 %cmp, i16 %a, i16 %b
    127   store i16 %val, i16 addrspace(1)* %outgep, align 4
    128   ret void
    129 }
    130 
    131 ; GCN-LABEL: {{^}}v_test_umax_ugt_v2i16:
    132 ; VI: v_max_u16_e32
    133 ; VI: v_max_u16_sdwa
    134 
    135 ; GFX9: v_pk_max_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    136 define amdgpu_kernel void @v_test_umax_ugt_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %aptr, <2 x i16> addrspace(1)* %bptr) nounwind {
    137   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    138   %gep0 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %aptr, i32 %tid
    139   %gep1 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %bptr, i32 %tid
    140   %outgep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
    141   %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep0, align 4
    142   %b = load <2 x i16>, <2 x i16> addrspace(1)* %gep1, align 4
    143   %cmp = icmp ugt <2 x i16> %a, %b
    144   %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
    145   store <2 x i16> %val, <2 x i16> addrspace(1)* %outgep, align 4
    146   ret void
    147 }
    148 
    149 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    150