Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck %s
      2 ; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck %s
      4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
      5 
      6 ; This checks for a bug where uniform control flow can result in multiple
      7 ; v_cmp results being combined together with s_and_b64, s_or_b64 and s_xor_b64,
      8 ; using the resulting mask in s_cbranch_vccnz
      9 ; without ensuring that the resulting mask has bits clear for inactive lanes.
     10 ; The problematic case is s_xor_b64, as, unlike the other ops, it can actually
     11 ; set bits for inactive lanes.
     12 ;
     13 ; The check for an s_xor_b64 is just to check that this test tests what it is
     14 ; supposed to test. If the s_xor_b64 disappears due to some other case, it does
     15 ; not necessarily mean that the bug has reappeared.
     16 ;
     17 ; The check for "s_and_b64 vcc, exec, something" checks that the bug is fixed.
     18 
     19 ; CHECK: {{^}}main:
     20 ; CHECK: s_xor_b64
     21 ; CHECK: s_and_b64 vcc, exec,
     22 
     23 define amdgpu_cs void @main(i32 inreg %arg) {
     24 .entry:
     25   %tmp44 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
     26   %tmp16 = load volatile float, float addrspace(1)* undef
     27   %tmp22 = load volatile float, float addrspace(1)* undef
     28   %tmp25 = load volatile float, float addrspace(1)* undef
     29   %tmp31 = fcmp olt float %tmp16, 0x3FA99999A0000000
     30   br i1 %tmp31, label %bb, label %.exit.thread
     31 
     32 bb:                                               ; preds = %.entry
     33   %tmp42 = fcmp olt float %tmp25, 0x3FA99999A0000000
     34   br i1 %tmp42, label %bb43, label %.exit.thread
     35 
     36 bb43:
     37   %tmp46 = fcmp olt <2 x float> %tmp44, <float 0x3FA99999A0000000, float 0x3FA99999A0000000>
     38   %tmp47 = extractelement <2 x i1> %tmp46, i32 0
     39   %tmp48 = extractelement <2 x i1> %tmp46, i32 1
     40   %tmp49 = and i1 %tmp47, %tmp48
     41   br i1 %tmp49, label %bb50, label %.exit.thread
     42 
     43 bb50:
     44   %tmp53 = fcmp olt float %tmp22, 0x3FA99999A0000000
     45   br i1 %tmp53, label %.exit3.i, label %.exit.thread
     46 
     47 .exit3.i:
     48   store volatile i32 0, i32 addrspace(1)* undef
     49   br label %.exit.thread
     50 
     51 .exit.thread:
     52   ret void
     53 }
     54 
     55