Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc < %s -march=amdgcn -mcpu=verde -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
      2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
      3 
      4 ; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:
      5 
      6 ; SI: [[LOOP_LABEL:[A-Z0-9]+]]:
      7 ; Lowered break instructin:
      8 ; SI: s_or_b64
      9 ; Lowered Loop instruction:
     10 ; SI: s_andn2_b64
     11 ; s_cbranch_execnz [[LOOP_LABEL]]
     12 ; SI: s_endpgm
     13 define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
     14 main_body:
     15   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     16   %0 = and i32 %a, %tid
     17   %1 = trunc i32 %0 to i1
     18   br label %ENDIF
     19 
     20 ENDLOOP:
     21   store i32 0, i32 addrspace(1)* %out
     22   ret void
     23 
     24 ENDIF:
     25   br i1 %1, label %ENDLOOP, label %ENDIF
     26 }
     27 
     28 
     29 ; FUNC-LABEL: {{^}}phi_cond_outside_loop:
     30 ; FIXME: This could be folded into the s_or_b64 instruction
     31 ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0
     32 ; SI: [[LOOP_LABEL:[A-Z0-9]+]]
     33 ; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
     34 
     35 ; SI_IF_BREAK instruction:
     36 ; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]]
     37 
     38 ; SI_LOOP instruction:
     39 ; SI: s_andn2_b64 exec, exec, [[BREAK]]
     40 ; SI: s_cbranch_execnz [[LOOP_LABEL]]
     41 ; SI: s_endpgm
     42 
     43 define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
     44 entry:
     45   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     46   %0 = icmp eq i32 %tid , 0
     47   br i1 %0, label %if, label %else
     48 
     49 if:
     50   br label %endif
     51 
     52 else:
     53   %1 = icmp eq i32 %b, 0
     54   br label %endif
     55 
     56 endif:
     57   %2 = phi i1 [0, %if], [%1, %else]
     58   br label %loop
     59 
     60 loop:
     61   br i1 %2, label %exit, label %loop
     62 
     63 exit:
     64   ret void
     65 }
     66 
     67 ; FIXME: should emit s_endpgm
     68 ; CHECK-LABEL: {{^}}switch_unreachable:
     69 ; CHECK-NOT: s_endpgm
     70 ; CHECK: .Lfunc_end2
     71 define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
     72 centry:
     73   switch i32 %x, label %sw.default [
     74     i32 0, label %sw.bb
     75     i32 60, label %sw.bb
     76   ]
     77 
     78 sw.bb:
     79   unreachable
     80 
     81 sw.default:
     82   unreachable
     83 
     84 sw.epilog:
     85   ret void
     86 }
     87 
     88 declare float @llvm.fabs.f32(float) nounwind readnone
     89 
     90 ; This broke the old AMDIL cfg structurizer
     91 ; FUNC-LABEL: {{^}}loop_land_info_assert:
     92 ; SI:      v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
     93 ; SI:      s_and_b64 vcc, exec, [[CMP4]]
     94 ; SI-NEXT: s_cbranch_vccnz [[BR1:BB[0-9_]+]]
     95 ; SI-NEXT: s_branch [[BR2:BB[0-9_]+]]
     96 ; SI-NEXT: BB{{[0-9_]+}}:
     97 ; SI-NEXT: buffer_store_dword
     98 
     99 ; SI:      [[INFLOOP:BB[0-9]+_[0-9]+]]:
    100 
    101 ; SI:      [[BR1]]:
    102 ; SI-NEXT: s_and_b64 vcc, exec,
    103 ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
    104 ; SI:      s_branch [[INFLOOP]]
    105 ; SI-NEXT: [[BR2]]:
    106 ; SI:      s_cbranch_vccz [[ENDPGM]]
    107 
    108 ; SI:      [[ENDPGM]]:
    109 ; SI-NEXT: s_endpgm
    110 define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
    111 entry:
    112   %cmp = icmp sgt i32 %c0, 0
    113   br label %while.cond.outer
    114 
    115 while.cond.outer:
    116   %tmp = load float, float addrspace(1)* undef
    117   br label %while.cond
    118 
    119 while.cond:
    120   %cmp1 = icmp slt i32 %c1, 4
    121   br i1 %cmp1, label %convex.exit, label %for.cond
    122 
    123 convex.exit:
    124   %or = or i1 %cmp, %cmp1
    125   br i1 %or, label %return, label %if.end
    126 
    127 if.end:
    128   %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
    129   %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
    130   br i1 %cmp2, label %if.else, label %while.cond.outer
    131 
    132 if.else:
    133   store volatile i32 3, i32 addrspace(1)* undef, align 4
    134   br label %while.cond
    135 
    136 for.cond:
    137   %cmp3 = icmp slt i32 %c3, 1000
    138   br i1 %cmp3, label %for.body, label %return
    139 
    140 for.body:
    141   br i1 %cmp3, label %self.loop, label %if.end.2
    142 
    143 if.end.2:
    144   %or.cond2 = or i1 %cmp3, %arg
    145   br i1 %or.cond2, label %return, label %for.cond
    146 
    147 self.loop:
    148  br label %self.loop
    149 
    150 return:
    151   ret void
    152 }
    153 
    154 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
    155 
    156 attributes #0 = { nounwind readnone }
    157