1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 3 4 ; FUNC-LABEL: {{^}}s_abs_i32: 5 ; GCN: s_abs_i32 6 ; GCN: s_add_i32 7 define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind { 8 %neg = sub i32 0, %val 9 %cond = icmp sgt i32 %val, %neg 10 %res = select i1 %cond, i32 %val, i32 %neg 11 %res2 = add i32 %res, 2 12 store i32 %res2, i32 addrspace(1)* %out, align 4 13 ret void 14 } 15 16 ; FUNC-LABEL: {{^}}v_abs_i32: 17 ; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] 18 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]] 19 ; GCN: v_add_i32 20 define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { 21 %val = load i32, i32 addrspace(1)* %src, align 4 22 %neg = sub i32 0, %val 23 %cond = icmp sgt i32 %val, %neg 24 %res = select i1 %cond, i32 %val, i32 %neg 25 %res2 = add i32 %res, 2 26 store i32 %res2, i32 addrspace(1)* %out, align 4 27 ret void 28 } 29 30 ; FUNC-LABEL: {{^}}s_abs_v2i32: 31 ; GCN: s_abs_i32 32 ; GCN: s_abs_i32 33 ; GCN: s_add_i32 34 ; GCN: s_add_i32 35 define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind { 36 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 37 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 38 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 39 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 40 %neg = sub <2 x i32> %z1, %val 41 %cond = icmp sgt <2 x i32> %val, %neg 42 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 43 %res2 = add <2 x i32> %res, %t1 44 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 45 ret void 46 } 47 48 ; FUNC-LABEL: {{^}}v_abs_v2i32: 49 ; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 50 ; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 51 52 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] 53 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] 54 55 ; GCN: v_add_i32 56 ; GCN: v_add_i32 57 define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind { 58 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 59 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 60 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 61 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 62 %val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4 63 %neg = sub <2 x i32> %z1, %val 64 %cond = icmp sgt <2 x i32> %val, %neg 65 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 66 %res2 = add <2 x i32> %res, %t1 67 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 68 ret void 69 } 70 71 ; FUNC-LABEL: {{^}}s_abs_v4i32: 72 ; TODO: this should use s_abs_i32 73 ; GCN: s_abs_i32 74 ; GCN: s_abs_i32 75 ; GCN: s_abs_i32 76 ; GCN: s_abs_i32 77 78 ; GCN: s_add_i32 79 ; GCN: s_add_i32 80 ; GCN: s_add_i32 81 ; GCN: s_add_i32 82 define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind { 83 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 84 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 85 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 86 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 87 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 88 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 89 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 90 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 91 %neg = sub <4 x i32> %z3, %val 92 %cond = icmp sgt <4 x i32> %val, %neg 93 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 94 %res2 = add <4 x i32> %res, %t3 95 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 96 ret void 97 } 98 99 ; FUNC-LABEL: {{^}}v_abs_v4i32: 100 ; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 101 ; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 102 ; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] 103 ; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] 104 105 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] 106 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] 107 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]] 108 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]] 109 110 ; GCN: v_add_i32 111 ; GCN: v_add_i32 112 ; GCN: v_add_i32 113 ; GCN: v_add_i32 114 define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind { 115 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 116 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 117 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 118 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 119 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 120 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 121 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 122 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 123 %val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4 124 %neg = sub <4 x i32> %z3, %val 125 %cond = icmp sgt <4 x i32> %val, %neg 126 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 127 %res2 = add <4 x i32> %res, %t3 128 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 129 ret void 130 } 131