Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
      2 
      3 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
      4 
      5 ; SI-LABEL: @test_if
      6 ; Make sure the i1 values created by the cfg structurizer pass are
      7 ; moved using VALU instructions
      8 ; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
      9 ; SI: v_mov_b32_e32 v{{[0-9]}}, -1
     10 define void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
     11 entry:
     12   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     13   switch i32 %tid, label %default [
     14     i32 0, label %case0
     15     i32 1, label %case1
     16   ]
     17 
     18 case0:
     19   %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
     20   store i32 0, i32 addrspace(1)* %arrayidx1, align 4
     21   br label %end
     22 
     23 case1:
     24   %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
     25   store i32 1, i32 addrspace(1)* %arrayidx5, align 4
     26   br label %end
     27 
     28 default:
     29   %cmp8 = icmp eq i32 %tid, 2
     30   %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
     31   br i1 %cmp8, label %if, label %else
     32 
     33 if:
     34   store i32 2, i32 addrspace(1)* %arrayidx10, align 4
     35   br label %end
     36 
     37 else:
     38   store i32 3, i32 addrspace(1)* %arrayidx10, align 4
     39   br label %end
     40 
     41 end:
     42   ret void
     43 }
     44 
     45 ; SI-LABEL: @simple_test_v_if
     46 ; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
     47 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
     48 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
     49 
     50 ; SI: ; BB#1
     51 ; SI: buffer_store_dword
     52 ; SI: s_endpgm
     53 
     54 ; SI: BB1_2:
     55 ; SI: s_or_b64 exec, exec, [[BR_SREG]]
     56 ; SI: s_endpgm
     57 define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
     58   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     59   %is.0 = icmp ne i32 %tid, 0
     60   br i1 %is.0, label %store, label %exit
     61 
     62 store:
     63   %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
     64   store i32 999, i32 addrspace(1)* %gep
     65   ret void
     66 
     67 exit:
     68   ret void
     69 }
     70 
     71 ; SI-LABEL: @simple_test_v_loop
     72 ; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
     73 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
     74 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
     75 ; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
     76 
     77 ; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
     78 
     79 ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
     80 ; SI: buffer_load_dword
     81 ; SI-DAG: buffer_store_dword
     82 ; SI-DAG: v_cmp_eq_i32_e32 vcc,
     83 ; SI-DAG: s_and_b64 vcc, exec, vcc
     84 ; SI: s_cbranch_vccz [[LABEL_LOOP]]
     85 ; SI: [[LABEL_EXIT]]:
     86 ; SI: s_endpgm
     87 
     88 define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
     89 entry:
     90   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     91   %is.0 = icmp ne i32 %tid, 0
     92   %limit = add i32 %tid, 64
     93   br i1 %is.0, label %loop, label %exit
     94 
     95 loop:
     96   %i = phi i32 [%tid, %entry], [%i.inc, %loop]
     97   %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i
     98   %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i
     99   %load = load i32, i32 addrspace(1)* %src
    100   store i32 %load, i32 addrspace(1)* %gep.dst
    101   %i.inc = add nsw i32 %i, 1
    102   %cmp = icmp eq i32 %limit, %i.inc
    103   br i1 %cmp, label %exit, label %loop
    104 
    105 exit:
    106   ret void
    107 }
    108 
    109 ; SI-LABEL: @multi_vcond_loop
    110 
    111 ; Load loop limit from buffer
    112 ; Branch to exit if uniformly not taken
    113 ; SI: ; BB#0:
    114 ; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
    115 ; SI: v_cmp_lt_i32_e32 vcc
    116 ; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
    117 ; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
    118 ; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
    119 
    120 ; Initialize inner condition to false
    121 ; SI: ; BB#1:
    122 ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
    123 ; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
    124 
    125 ; Clear exec bits for workitems that load -1s
    126 ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
    127 ; SI: buffer_load_dword [[B:v[0-9]+]]
    128 ; SI: buffer_load_dword [[A:v[0-9]+]]
    129 ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
    130 ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
    131 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
    132 ; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
    133 ; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
    134 ; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
    135 
    136 ; SI: BB#3:
    137 ; SI: buffer_store_dword
    138 ; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
    139 ; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
    140 
    141 ; SI: [[LABEL_FLOW]]:
    142 ; SI: s_or_b64 exec, exec, [[ORNEG2]]
    143 ; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]
    144 ; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
    145 ; SI: s_cbranch_execnz [[LABEL_LOOP]]
    146 
    147 ; SI: BB#5
    148 ; SI: s_or_b64 exec, exec, [[COND_STATE]]
    149 
    150 ; SI: [[LABEL_EXIT]]:
    151 ; SI-NOT: [[COND_STATE]]
    152 ; SI: s_endpgm
    153 
    154 define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
    155 bb:
    156   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
    157   %tmp4 = sext i32 %tmp to i64
    158   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
    159   %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
    160   %tmp7 = icmp sgt i32 %tmp6, 0
    161   %tmp8 = sext i32 %tmp6 to i64
    162   br i1 %tmp7, label %bb10, label %bb26
    163 
    164 bb10:                                             ; preds = %bb, %bb20
    165   %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
    166   %tmp12 = add nsw i64 %tmp11, %tmp4
    167   %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12
    168   %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
    169   %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12
    170   %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
    171   %tmp17 = icmp ne i32 %tmp14, -1
    172   %tmp18 = icmp ne i32 %tmp16, -1
    173   %tmp19 = and i1 %tmp17, %tmp18
    174   br i1 %tmp19, label %bb20, label %bb26
    175 
    176 bb20:                                             ; preds = %bb10
    177   %tmp21 = add nsw i32 %tmp16, %tmp14
    178   %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12
    179   store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
    180   %tmp23 = add nuw nsw i64 %tmp11, 1
    181   %tmp24 = icmp slt i64 %tmp23, %tmp8
    182   br i1 %tmp24, label %bb10, label %bb26
    183 
    184 bb26:                                             ; preds = %bb10, %bb20, %bb
    185   ret void
    186 }
    187 
    188 attributes #0 = { nounwind readnone }
    189 attributes #1 = { nounwind }
    190