1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s 2 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4 declare i32 @llvm.r600.read.tidig.x() nounwind readnone 5 6 ; FUNC-LABEL: {{^}}setcc_v2i32: 7 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z 8 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y 9 10 define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) { 11 %result = icmp eq <2 x i32> %a, %b 12 %sext = sext <2 x i1> %result to <2 x i32> 13 store <2 x i32> %sext, <2 x i32> addrspace(1)* %out 14 ret void 15 } 16 17 ; FUNC-LABEL: {{^}}setcc_v4i32: 18 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 19 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 20 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 21 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 22 23 define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 24 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 25 %a = load <4 x i32>, <4 x i32> addrspace(1) * %in 26 %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr 27 %result = icmp eq <4 x i32> %a, %b 28 %sext = sext <4 x i1> %result to <4 x i32> 29 store <4 x i32> %sext, <4 x i32> addrspace(1)* %out 30 ret void 31 } 32 33 ;;;==========================================================================;;; 34 ;; Float comparisons 35 ;;;==========================================================================;;; 36 37 ; FUNC-LABEL: {{^}}f32_oeq: 38 ; R600: SETE_DX10 39 ; SI: v_cmp_eq_f32 40 define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) { 41 entry: 42 %0 = fcmp oeq float %a, %b 43 %1 = sext i1 %0 to i32 44 store i32 %1, i32 addrspace(1)* %out 45 ret void 46 } 47 48 ; FUNC-LABEL: {{^}}f32_ogt: 49 ; R600: SETGT_DX10 50 ; SI: v_cmp_gt_f32 51 define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) { 52 entry: 53 %0 = fcmp ogt float %a, %b 54 %1 = sext i1 %0 to i32 55 store i32 %1, i32 addrspace(1)* %out 56 ret void 57 } 58 59 ; FUNC-LABEL: {{^}}f32_oge: 60 ; R600: SETGE_DX10 61 ; SI: v_cmp_ge_f32 62 define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) { 63 entry: 64 %0 = fcmp oge float %a, %b 65 %1 = sext i1 %0 to i32 66 store i32 %1, i32 addrspace(1)* %out 67 ret void 68 } 69 70 ; FUNC-LABEL: {{^}}f32_olt: 71 ; R600: SETGT_DX10 72 ; SI: v_cmp_lt_f32 73 define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) { 74 entry: 75 %0 = fcmp olt float %a, %b 76 %1 = sext i1 %0 to i32 77 store i32 %1, i32 addrspace(1)* %out 78 ret void 79 } 80 81 ; FUNC-LABEL: {{^}}f32_ole: 82 ; R600: SETGE_DX10 83 ; SI: v_cmp_le_f32 84 define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) { 85 entry: 86 %0 = fcmp ole float %a, %b 87 %1 = sext i1 %0 to i32 88 store i32 %1, i32 addrspace(1)* %out 89 ret void 90 } 91 92 ; FUNC-LABEL: {{^}}f32_one: 93 ; R600-DAG: SETE_DX10 94 ; R600-DAG: SETE_DX10 95 ; R600-DAG: AND_INT 96 ; R600-DAG: SETNE_DX10 97 ; R600-DAG: AND_INT 98 ; R600-DAG: SETNE_INT 99 100 ; SI: v_cmp_lg_f32_e32 vcc 101 ; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 102 define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) { 103 entry: 104 %0 = fcmp one float %a, %b 105 %1 = sext i1 %0 to i32 106 store i32 %1, i32 addrspace(1)* %out 107 ret void 108 } 109 110 ; FUNC-LABEL: {{^}}f32_ord: 111 ; R600-DAG: SETE_DX10 112 ; R600-DAG: SETE_DX10 113 ; R600-DAG: AND_INT 114 ; R600-DAG: SETNE_INT 115 ; SI: v_cmp_o_f32 116 define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) { 117 entry: 118 %0 = fcmp ord float %a, %b 119 %1 = sext i1 %0 to i32 120 store i32 %1, i32 addrspace(1)* %out 121 ret void 122 } 123 124 ; FUNC-LABEL: {{^}}f32_ueq: 125 ; R600-DAG: SETNE_DX10 126 ; R600-DAG: SETNE_DX10 127 ; R600-DAG: OR_INT 128 ; R600-DAG: SETE_DX10 129 ; R600-DAG: OR_INT 130 ; R600-DAG: SETNE_INT 131 132 ; SI: v_cmp_nlg_f32_e32 vcc 133 ; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 134 define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) { 135 entry: 136 %0 = fcmp ueq float %a, %b 137 %1 = sext i1 %0 to i32 138 store i32 %1, i32 addrspace(1)* %out 139 ret void 140 } 141 142 ; FUNC-LABEL: {{^}}f32_ugt: 143 ; R600: SETGE 144 ; R600: SETE_DX10 145 ; SI: v_cmp_nle_f32_e32 vcc 146 ; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 147 define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) { 148 entry: 149 %0 = fcmp ugt float %a, %b 150 %1 = sext i1 %0 to i32 151 store i32 %1, i32 addrspace(1)* %out 152 ret void 153 } 154 155 ; FUNC-LABEL: {{^}}f32_uge: 156 ; R600: SETGT 157 ; R600: SETE_DX10 158 159 ; SI: v_cmp_nlt_f32_e32 vcc 160 ; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 161 define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) { 162 entry: 163 %0 = fcmp uge float %a, %b 164 %1 = sext i1 %0 to i32 165 store i32 %1, i32 addrspace(1)* %out 166 ret void 167 } 168 169 ; FUNC-LABEL: {{^}}f32_ult: 170 ; R600: SETGE 171 ; R600: SETE_DX10 172 173 ; SI: v_cmp_nge_f32_e32 vcc 174 ; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 175 define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) { 176 entry: 177 %0 = fcmp ult float %a, %b 178 %1 = sext i1 %0 to i32 179 store i32 %1, i32 addrspace(1)* %out 180 ret void 181 } 182 183 ; FUNC-LABEL: {{^}}f32_ule: 184 ; R600: SETGT 185 ; R600: SETE_DX10 186 187 ; SI: v_cmp_ngt_f32_e32 vcc 188 ; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 189 define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) { 190 entry: 191 %0 = fcmp ule float %a, %b 192 %1 = sext i1 %0 to i32 193 store i32 %1, i32 addrspace(1)* %out 194 ret void 195 } 196 197 ; FUNC-LABEL: {{^}}f32_une: 198 ; R600: SETNE_DX10 199 ; SI: v_cmp_neq_f32 200 define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) { 201 entry: 202 %0 = fcmp une float %a, %b 203 %1 = sext i1 %0 to i32 204 store i32 %1, i32 addrspace(1)* %out 205 ret void 206 } 207 208 ; FUNC-LABEL: {{^}}f32_uno: 209 ; R600: SETNE_DX10 210 ; R600: SETNE_DX10 211 ; R600: OR_INT 212 ; R600: SETNE_INT 213 ; SI: v_cmp_u_f32 214 define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) { 215 entry: 216 %0 = fcmp uno float %a, %b 217 %1 = sext i1 %0 to i32 218 store i32 %1, i32 addrspace(1)* %out 219 ret void 220 } 221 222 ;;;==========================================================================;;; 223 ;; 32-bit integer comparisons 224 ;;;==========================================================================;;; 225 226 ; FUNC-LABEL: {{^}}i32_eq: 227 ; R600: SETE_INT 228 ; SI: v_cmp_eq_i32 229 define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) { 230 entry: 231 %0 = icmp eq i32 %a, %b 232 %1 = sext i1 %0 to i32 233 store i32 %1, i32 addrspace(1)* %out 234 ret void 235 } 236 237 ; FUNC-LABEL: {{^}}i32_ne: 238 ; R600: SETNE_INT 239 ; SI: v_cmp_ne_i32 240 define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) { 241 entry: 242 %0 = icmp ne i32 %a, %b 243 %1 = sext i1 %0 to i32 244 store i32 %1, i32 addrspace(1)* %out 245 ret void 246 } 247 248 ; FUNC-LABEL: {{^}}i32_ugt: 249 ; R600: SETGT_UINT 250 ; SI: v_cmp_gt_u32 251 define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) { 252 entry: 253 %0 = icmp ugt i32 %a, %b 254 %1 = sext i1 %0 to i32 255 store i32 %1, i32 addrspace(1)* %out 256 ret void 257 } 258 259 ; FUNC-LABEL: {{^}}i32_uge: 260 ; R600: SETGE_UINT 261 ; SI: v_cmp_ge_u32 262 define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) { 263 entry: 264 %0 = icmp uge i32 %a, %b 265 %1 = sext i1 %0 to i32 266 store i32 %1, i32 addrspace(1)* %out 267 ret void 268 } 269 270 ; FUNC-LABEL: {{^}}i32_ult: 271 ; R600: SETGT_UINT 272 ; SI: v_cmp_lt_u32 273 define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) { 274 entry: 275 %0 = icmp ult i32 %a, %b 276 %1 = sext i1 %0 to i32 277 store i32 %1, i32 addrspace(1)* %out 278 ret void 279 } 280 281 ; FUNC-LABEL: {{^}}i32_ule: 282 ; R600: SETGE_UINT 283 ; SI: v_cmp_le_u32 284 define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) { 285 entry: 286 %0 = icmp ule i32 %a, %b 287 %1 = sext i1 %0 to i32 288 store i32 %1, i32 addrspace(1)* %out 289 ret void 290 } 291 292 ; FUNC-LABEL: {{^}}i32_sgt: 293 ; R600: SETGT_INT 294 ; SI: v_cmp_gt_i32 295 define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) { 296 entry: 297 %0 = icmp sgt i32 %a, %b 298 %1 = sext i1 %0 to i32 299 store i32 %1, i32 addrspace(1)* %out 300 ret void 301 } 302 303 ; FUNC-LABEL: {{^}}i32_sge: 304 ; R600: SETGE_INT 305 ; SI: v_cmp_ge_i32 306 define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) { 307 entry: 308 %0 = icmp sge i32 %a, %b 309 %1 = sext i1 %0 to i32 310 store i32 %1, i32 addrspace(1)* %out 311 ret void 312 } 313 314 ; FUNC-LABEL: {{^}}i32_slt: 315 ; R600: SETGT_INT 316 ; SI: v_cmp_lt_i32 317 define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) { 318 entry: 319 %0 = icmp slt i32 %a, %b 320 %1 = sext i1 %0 to i32 321 store i32 %1, i32 addrspace(1)* %out 322 ret void 323 } 324 325 ; FUNC-LABEL: {{^}}i32_sle: 326 ; R600: SETGE_INT 327 ; SI: v_cmp_le_i32 328 define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) { 329 entry: 330 %0 = icmp sle i32 %a, %b 331 %1 = sext i1 %0 to i32 332 store i32 %1, i32 addrspace(1)* %out 333 ret void 334 } 335 336 ; FIXME: This does 4 compares 337 ; FUNC-LABEL: {{^}}v3i32_eq: 338 ; SI-DAG: v_cmp_eq_i32 339 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 340 ; SI-DAG: v_cmp_eq_i32 341 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 342 ; SI-DAG: v_cmp_eq_i32 343 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 344 ; SI: s_endpgm 345 define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) { 346 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 347 %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid 348 %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid 349 %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid 350 %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a 351 %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b 352 %cmp = icmp eq <3 x i32> %a, %b 353 %ext = sext <3 x i1> %cmp to <3 x i32> 354 store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out 355 ret void 356 } 357 358 ; FUNC-LABEL: {{^}}v3i8_eq: 359 ; SI-DAG: v_cmp_eq_i32 360 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 361 ; SI-DAG: v_cmp_eq_i32 362 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 363 ; SI-DAG: v_cmp_eq_i32 364 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 365 ; SI: s_endpgm 366 define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) { 367 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 368 %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid 369 %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid 370 %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid 371 %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a 372 %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b 373 %cmp = icmp eq <3 x i8> %a, %b 374 %ext = sext <3 x i1> %cmp to <3 x i8> 375 store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out 376 ret void 377 } 378