1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3 ; GCN-LABEL: {{^}}v_sad_u32_pat1: 4 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 5 define amdgpu_kernel void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 6 %icmp0 = icmp ugt i32 %a, %b 7 %t0 = select i1 %icmp0, i32 %a, i32 %b 8 9 %icmp1 = icmp ule i32 %a, %b 10 %t1 = select i1 %icmp1, i32 %a, i32 %b 11 12 %ret0 = sub i32 %t0, %t1 13 %ret = add i32 %ret0, %c 14 15 store i32 %ret, i32 addrspace(1)* %out 16 ret void 17 } 18 19 ; GCN-LABEL: {{^}}v_sad_u32_constant_pat1: 20 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 20 21 define amdgpu_kernel void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) { 22 %icmp0 = icmp ugt i32 %a, 90 23 %t0 = select i1 %icmp0, i32 %a, i32 90 24 25 %icmp1 = icmp ule i32 %a, 90 26 %t1 = select i1 %icmp1, i32 %a, i32 90 27 28 %ret0 = sub i32 %t0, %t1 29 %ret = add i32 %ret0, 20 30 31 store i32 %ret, i32 addrspace(1)* %out 32 ret void 33 } 34 35 ; GCN-LABEL: {{^}}v_sad_u32_pat2: 36 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 37 define amdgpu_kernel void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 38 %icmp0 = icmp ugt i32 %a, %b 39 %sub0 = sub i32 %a, %b 40 %sub1 = sub i32 %b, %a 41 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 42 43 %ret = add i32 %ret0, %c 44 45 store i32 %ret, i32 addrspace(1)* %out 46 ret void 47 } 48 49 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat1: 50 ; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 51 ; GCN: s_min_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 52 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 53 ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 54 define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 55 %icmp0 = icmp ugt i32 %a, %b 56 %t0 = select i1 %icmp0, i32 %a, i32 %b 57 58 %icmp1 = icmp ule i32 %a, %b 59 %t1 = select i1 %icmp1, i32 %a, i32 %b 60 61 %ret0 = sub i32 %t0, %t1 62 store volatile i32 %ret0, i32 addrspace(5)*undef 63 %ret = add i32 %ret0, %c 64 65 store i32 %ret, i32 addrspace(1)* %out 66 ret void 67 } 68 69 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1: 70 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 71 define amdgpu_kernel void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 72 %icmp0 = icmp ugt i32 %a, %b 73 %t0 = select i1 %icmp0, i32 %a, i32 %b 74 75 %icmp1 = icmp ule i32 %a, %b 76 %t1 = select i1 %icmp1, i32 %a, i32 %b 77 78 %ret0 = sub i32 %t0, %t1 79 %ret = add i32 %ret0, %c 80 store volatile i32 %ret, i32 addrspace(5)*undef 81 store i32 %ret, i32 addrspace(1)* %out 82 ret void 83 } 84 85 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1: 86 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 87 define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 88 %icmp0 = icmp ugt i32 %a, %b 89 %t0 = select i1 %icmp0, i32 %a, i32 %b 90 store volatile i32 %t0, i32 addrspace(5)*undef 91 92 %icmp1 = icmp ule i32 %a, %b 93 %t1 = select i1 %icmp1, i32 %a, i32 %b 94 95 %ret0 = sub i32 %t0, %t1 96 %ret = add i32 %ret0, %c 97 98 store i32 %ret, i32 addrspace(1)* %out 99 ret void 100 } 101 102 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1: 103 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 104 define amdgpu_kernel void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 105 %icmp0 = icmp ugt i32 %a, %b 106 %t0 = select i1 %icmp0, i32 %a, i32 %b 107 108 %icmp1 = icmp ule i32 %a, %b 109 %t1 = select i1 %icmp1, i32 %a, i32 %b 110 111 store volatile i32 %t1, i32 addrspace(5)*undef 112 113 %ret0 = sub i32 %t0, %t1 114 %ret = add i32 %ret0, %c 115 116 store i32 %ret, i32 addrspace(1)* %out 117 ret void 118 } 119 120 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2: 121 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 122 define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 123 %icmp0 = icmp ugt i32 %a, %b 124 %sub0 = sub i32 %a, %b 125 store volatile i32 %sub0, i32 addrspace(5)*undef 126 %sub1 = sub i32 %b, %a 127 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 128 129 %ret = add i32 %ret0, %c 130 131 store i32 %ret, i32 addrspace(1)* %out 132 ret void 133 } 134 135 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_select_pat2: 136 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 137 ; GCN-DAG: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} 138 ; GCN-DAG: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 139 define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 140 %icmp0 = icmp ugt i32 %a, %b 141 %sub0 = sub i32 %a, %b 142 %sub1 = sub i32 %b, %a 143 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 144 store volatile i32 %ret0, i32 addrspace(5)*undef 145 146 %ret = add i32 %ret0, %c 147 148 store i32 %ret, i32 addrspace(1)* %out 149 ret void 150 } 151 152 ; GCN-LABEL: {{^}}v_sad_u32_vector_pat1: 153 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 154 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 155 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 156 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 157 define amdgpu_kernel void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 158 %icmp0 = icmp ugt <4 x i32> %a, %b 159 %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b 160 161 %icmp1 = icmp ule <4 x i32> %a, %b 162 %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b 163 164 %ret0 = sub <4 x i32> %t0, %t1 165 %ret = add <4 x i32> %ret0, %c 166 167 store <4 x i32> %ret, <4 x i32> addrspace(1)* %out 168 ret void 169 } 170 171 ; GCN-LABEL: {{^}}v_sad_u32_vector_pat2: 172 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 173 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 174 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 175 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 176 define amdgpu_kernel void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 177 %icmp0 = icmp ugt <4 x i32> %a, %b 178 %sub0 = sub <4 x i32> %a, %b 179 %sub1 = sub <4 x i32> %b, %a 180 %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1 181 182 %ret = add <4 x i32> %ret0, %c 183 184 store <4 x i32> %ret, <4 x i32> addrspace(1)* %out 185 ret void 186 } 187 188 ; GCN-LABEL: {{^}}v_sad_u32_i16_pat1: 189 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 190 define amdgpu_kernel void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { 191 192 %icmp0 = icmp ugt i16 %a, %b 193 %t0 = select i1 %icmp0, i16 %a, i16 %b 194 195 %icmp1 = icmp ule i16 %a, %b 196 %t1 = select i1 %icmp1, i16 %a, i16 %b 197 198 %ret0 = sub i16 %t0, %t1 199 %ret = add i16 %ret0, %c 200 201 store i16 %ret, i16 addrspace(1)* %out 202 ret void 203 } 204 205 ; GCN-LABEL: {{^}}v_sad_u32_i16_pat2: 206 ; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 207 define amdgpu_kernel void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out) { 208 %a = load volatile i16, i16 addrspace(1)* undef 209 %b = load volatile i16, i16 addrspace(1)* undef 210 %c = load volatile i16, i16 addrspace(1)* undef 211 %icmp0 = icmp ugt i16 %a, %b 212 %sub0 = sub i16 %a, %b 213 %sub1 = sub i16 %b, %a 214 %ret0 = select i1 %icmp0, i16 %sub0, i16 %sub1 215 216 %ret = add i16 %ret0, %c 217 218 store i16 %ret, i16 addrspace(1)* %out 219 ret void 220 } 221 222 ; GCN-LABEL: {{^}}v_sad_u32_i8_pat1: 223 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 224 define amdgpu_kernel void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { 225 %icmp0 = icmp ugt i8 %a, %b 226 %t0 = select i1 %icmp0, i8 %a, i8 %b 227 228 %icmp1 = icmp ule i8 %a, %b 229 %t1 = select i1 %icmp1, i8 %a, i8 %b 230 231 %ret0 = sub i8 %t0, %t1 232 %ret = add i8 %ret0, %c 233 234 store i8 %ret, i8 addrspace(1)* %out 235 ret void 236 } 237 238 ; GCN-LABEL: {{^}}v_sad_u32_i8_pat2: 239 ; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 240 define amdgpu_kernel void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out) { 241 %a = load volatile i8, i8 addrspace(1)* undef 242 %b = load volatile i8, i8 addrspace(1)* undef 243 %c = load volatile i8, i8 addrspace(1)* undef 244 %icmp0 = icmp ugt i8 %a, %b 245 %sub0 = sub i8 %a, %b 246 %sub1 = sub i8 %b, %a 247 %ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1 248 249 %ret = add i8 %ret0, %c 250 251 store i8 %ret, i8 addrspace(1)* %out 252 ret void 253 } 254 255 ; GCN-LABEL: {{^}}s_sad_u32_i8_pat2: 256 ; GCN: s_load_dword 257 ; GCN: s_bfe_u32 258 ; GCN: s_sub_i32 259 ; GCN: s_and_b32 260 ; GCN: s_sub_i32 261 ; GCN: s_lshr_b32 262 ; GCN: v_add_i32_e32 263 define amdgpu_kernel void @s_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) { 264 %icmp0 = icmp ugt i8 %a, %b 265 %sub0 = sub i8 %a, %b 266 %sub1 = sub i8 %b, %a 267 %ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1 268 269 %ret = add i8 %ret0, %c 270 271 store i8 %ret, i8 addrspace(1)* %out 272 ret void 273 } 274 275 ; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat1: 276 ; GCN: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} 277 ; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 278 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 279 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 280 define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { 281 %icmp0 = icmp ugt i32 %a, %b 282 %t0 = select i1 %icmp0, i32 %a, i32 %b 283 284 %icmp1 = icmp ule i32 %a, %b 285 %t1 = select i1 %icmp1, i32 %a, i32 %d 286 287 %ret0 = sub i32 %t0, %t1 288 %ret = add i32 %ret0, %c 289 290 store i32 %ret, i32 addrspace(1)* %out 291 ret void 292 } 293 294 ; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat2: 295 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 296 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 297 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 298 define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { 299 %icmp0 = icmp ugt i32 %a, %b 300 %sub0 = sub i32 %a, %d 301 %sub1 = sub i32 %b, %a 302 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 303 304 %ret = add i32 %ret0, %c 305 306 store i32 %ret, i32 addrspace(1)* %out 307 ret void 308 } 309 310