Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
      3 
      4 ; FUNC-LABEL: {{^}}s_abs_i32:
      5 ; GCN: s_abs_i32
      6 ; GCN: s_add_i32
      7 define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
      8   %neg = sub i32 0, %val
      9   %cond = icmp sgt i32 %val, %neg
     10   %res = select i1 %cond, i32 %val, i32 %neg
     11   %res2 = add i32 %res, 2
     12   store i32 %res2, i32 addrspace(1)* %out, align 4
     13   ret void
     14 }
     15 
     16 ; FUNC-LABEL: {{^}}v_abs_i32:
     17 ; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
     18 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
     19 ; GCN: v_add_i32
     20 define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
     21   %val = load i32, i32 addrspace(1)* %src, align 4
     22   %neg = sub i32 0, %val
     23   %cond = icmp sgt i32 %val, %neg
     24   %res = select i1 %cond, i32 %val, i32 %neg
     25   %res2 = add i32 %res, 2
     26   store i32 %res2, i32 addrspace(1)* %out, align 4
     27   ret void
     28 }
     29 
     30 ; FUNC-LABEL: {{^}}s_abs_v2i32:
     31 ; GCN: s_abs_i32
     32 ; GCN: s_abs_i32
     33 ; GCN: s_add_i32
     34 ; GCN: s_add_i32
     35 define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
     36   %z0 = insertelement <2 x i32> undef, i32 0, i32 0
     37   %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
     38   %t0 = insertelement <2 x i32> undef, i32 2, i32 0
     39   %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
     40   %neg = sub <2 x i32> %z1, %val
     41   %cond = icmp sgt <2 x i32> %val, %neg
     42   %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
     43   %res2 = add <2 x i32> %res, %t1
     44   store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
     45   ret void
     46 }
     47 
     48 ; FUNC-LABEL: {{^}}v_abs_v2i32:
     49 ; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
     50 ; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
     51 
     52 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
     53 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
     54 
     55 ; GCN: v_add_i32
     56 ; GCN: v_add_i32
     57 define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
     58   %z0 = insertelement <2 x i32> undef, i32 0, i32 0
     59   %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
     60   %t0 = insertelement <2 x i32> undef, i32 2, i32 0
     61   %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
     62   %val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4
     63   %neg = sub <2 x i32> %z1, %val
     64   %cond = icmp sgt <2 x i32> %val, %neg
     65   %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
     66   %res2 = add <2 x i32> %res, %t1
     67   store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
     68   ret void
     69 }
     70 
     71 ; FUNC-LABEL: {{^}}s_abs_v4i32:
     72 ; TODO: this should use s_abs_i32
     73 ; GCN: s_abs_i32
     74 ; GCN: s_abs_i32
     75 ; GCN: s_abs_i32
     76 ; GCN: s_abs_i32
     77 
     78 ; GCN: s_add_i32
     79 ; GCN: s_add_i32
     80 ; GCN: s_add_i32
     81 ; GCN: s_add_i32
     82 define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
     83   %z0 = insertelement <4 x i32> undef, i32 0, i32 0
     84   %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
     85   %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
     86   %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
     87   %t0 = insertelement <4 x i32> undef, i32 2, i32 0
     88   %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
     89   %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
     90   %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
     91   %neg = sub <4 x i32> %z3, %val
     92   %cond = icmp sgt <4 x i32> %val, %neg
     93   %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
     94   %res2 = add <4 x i32> %res, %t3
     95   store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
     96   ret void
     97 }
     98 
     99 ; FUNC-LABEL: {{^}}v_abs_v4i32:
    100 ; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
    101 ; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
    102 ; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
    103 ; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
    104 
    105 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
    106 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
    107 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
    108 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
    109 
    110 ; GCN: v_add_i32
    111 ; GCN: v_add_i32
    112 ; GCN: v_add_i32
    113 ; GCN: v_add_i32
    114 define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind {
    115   %z0 = insertelement <4 x i32> undef, i32 0, i32 0
    116   %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
    117   %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
    118   %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
    119   %t0 = insertelement <4 x i32> undef, i32 2, i32 0
    120   %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
    121   %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
    122   %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
    123   %val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4
    124   %neg = sub <4 x i32> %z3, %val
    125   %cond = icmp sgt <4 x i32> %val, %neg
    126   %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
    127   %res2 = add <4 x i32> %res, %t3
    128   store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
    129   ret void
    130 }
    131