Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
      3 
      4 ; SI-LABEL: {{^}}gs_const:
      5 ; SI-NOT: v_cmpx
      6 ; SI: s_mov_b64 exec, 0
      7 define amdgpu_gs void @gs_const() {
      8   %tmp = icmp ule i32 0, 3
      9   %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
     10   %c1 = fcmp oge float %tmp1, 0.0
     11   call void @llvm.amdgcn.kill(i1 %c1)
     12   %tmp2 = icmp ule i32 3, 0
     13   %tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
     14   %c2 = fcmp oge float %tmp3, 0.0
     15   call void @llvm.amdgcn.kill(i1 %c2)
     16   ret void
     17 }
     18 
     19 ; SI-LABEL: {{^}}vcc_implicit_def:
     20 ; SI-NOT: v_cmp_gt_f32_e32 vcc,
     21 ; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
     22 ; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
     23 ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
     24 define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
     25   %tmp0 = fcmp olt float %arg13, 0.000000e+00
     26   %c1 = fcmp oge float %arg14, 0.0
     27   call void @llvm.amdgcn.kill(i1 %c1)
     28   %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
     29   call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
     30   ret void
     31 }
     32 
     33 ; SI-LABEL: {{^}}true:
     34 ; SI-NEXT: %bb.
     35 ; SI-NEXT: %bb.
     36 ; SI-NEXT: s_endpgm
     37 define amdgpu_gs void @true() {
     38   call void @llvm.amdgcn.kill(i1 true)
     39   ret void
     40 }
     41 
     42 ; SI-LABEL: {{^}}false:
     43 ; SI-NOT: v_cmpx
     44 ; SI: s_mov_b64 exec, 0
     45 define amdgpu_gs void @false() {
     46   call void @llvm.amdgcn.kill(i1 false)
     47   ret void
     48 }
     49 
     50 ; SI-LABEL: {{^}}and:
     51 ; SI: v_cmp_lt_i32
     52 ; SI: v_cmp_lt_i32
     53 ; SI: s_or_b64 s[0:1]
     54 ; SI: s_and_b64 exec, exec, s[0:1]
     55 define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
     56   %c1 = icmp slt i32 %a, %b
     57   %c2 = icmp slt i32 %c, %d
     58   %x = or i1 %c1, %c2
     59   call void @llvm.amdgcn.kill(i1 %x)
     60   ret void
     61 }
     62 
     63 ; SI-LABEL: {{^}}andn2:
     64 ; SI: v_cmp_lt_i32
     65 ; SI: v_cmp_lt_i32
     66 ; SI: s_xor_b64 s[0:1]
     67 ; SI: s_andn2_b64 exec, exec, s[0:1]
     68 define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
     69   %c1 = icmp slt i32 %a, %b
     70   %c2 = icmp slt i32 %c, %d
     71   %x = xor i1 %c1, %c2
     72   %y = xor i1 %x, 1
     73   call void @llvm.amdgcn.kill(i1 %y)
     74   ret void
     75 }
     76 
     77 ; SI-LABEL: {{^}}oeq:
     78 ; SI: v_cmpx_eq_f32
     79 ; SI-NOT: s_and
     80 define amdgpu_gs void @oeq(float %a) {
     81   %c1 = fcmp oeq float %a, 0.0
     82   call void @llvm.amdgcn.kill(i1 %c1)
     83   ret void
     84 }
     85 
     86 ; SI-LABEL: {{^}}ogt:
     87 ; SI: v_cmpx_lt_f32
     88 ; SI-NOT: s_and
     89 define amdgpu_gs void @ogt(float %a) {
     90   %c1 = fcmp ogt float %a, 0.0
     91   call void @llvm.amdgcn.kill(i1 %c1)
     92   ret void
     93 }
     94 
     95 ; SI-LABEL: {{^}}oge:
     96 ; SI: v_cmpx_le_f32
     97 ; SI-NOT: s_and
     98 define amdgpu_gs void @oge(float %a) {
     99   %c1 = fcmp oge float %a, 0.0
    100   call void @llvm.amdgcn.kill(i1 %c1)
    101   ret void
    102 }
    103 
    104 ; SI-LABEL: {{^}}olt:
    105 ; SI: v_cmpx_gt_f32
    106 ; SI-NOT: s_and
    107 define amdgpu_gs void @olt(float %a) {
    108   %c1 = fcmp olt float %a, 0.0
    109   call void @llvm.amdgcn.kill(i1 %c1)
    110   ret void
    111 }
    112 
    113 ; SI-LABEL: {{^}}ole:
    114 ; SI: v_cmpx_ge_f32
    115 ; SI-NOT: s_and
    116 define amdgpu_gs void @ole(float %a) {
    117   %c1 = fcmp ole float %a, 0.0
    118   call void @llvm.amdgcn.kill(i1 %c1)
    119   ret void
    120 }
    121 
    122 ; SI-LABEL: {{^}}one:
    123 ; SI: v_cmpx_lg_f32
    124 ; SI-NOT: s_and
    125 define amdgpu_gs void @one(float %a) {
    126   %c1 = fcmp one float %a, 0.0
    127   call void @llvm.amdgcn.kill(i1 %c1)
    128   ret void
    129 }
    130 
    131 ; SI-LABEL: {{^}}ord:
    132 ; FIXME: This is absolutely unimportant, but we could use the cmpx variant here.
    133 ; SI: v_cmp_o_f32
    134 define amdgpu_gs void @ord(float %a) {
    135   %c1 = fcmp ord float %a, 0.0
    136   call void @llvm.amdgcn.kill(i1 %c1)
    137   ret void
    138 }
    139 
    140 ; SI-LABEL: {{^}}uno:
    141 ; FIXME: This is absolutely unimportant, but we could use the cmpx variant here.
    142 ; SI: v_cmp_u_f32
    143 define amdgpu_gs void @uno(float %a) {
    144   %c1 = fcmp uno float %a, 0.0
    145   call void @llvm.amdgcn.kill(i1 %c1)
    146   ret void
    147 }
    148 
    149 ; SI-LABEL: {{^}}ueq:
    150 ; SI: v_cmpx_nlg_f32
    151 ; SI-NOT: s_and
    152 define amdgpu_gs void @ueq(float %a) {
    153   %c1 = fcmp ueq float %a, 0.0
    154   call void @llvm.amdgcn.kill(i1 %c1)
    155   ret void
    156 }
    157 
    158 ; SI-LABEL: {{^}}ugt:
    159 ; SI: v_cmpx_nge_f32
    160 ; SI-NOT: s_and
    161 define amdgpu_gs void @ugt(float %a) {
    162   %c1 = fcmp ugt float %a, 0.0
    163   call void @llvm.amdgcn.kill(i1 %c1)
    164   ret void
    165 }
    166 
    167 ; SI-LABEL: {{^}}uge:
    168 ; SI: v_cmpx_ngt_f32_e32 vcc, -1.0
    169 ; SI-NOT: s_and
    170 define amdgpu_gs void @uge(float %a) {
    171   %c1 = fcmp uge float %a, -1.0
    172   call void @llvm.amdgcn.kill(i1 %c1)
    173   ret void
    174 }
    175 
    176 ; SI-LABEL: {{^}}ult:
    177 ; SI: v_cmpx_nle_f32_e32 vcc, -2.0
    178 ; SI-NOT: s_and
    179 define amdgpu_gs void @ult(float %a) {
    180   %c1 = fcmp ult float %a, -2.0
    181   call void @llvm.amdgcn.kill(i1 %c1)
    182   ret void
    183 }
    184 
    185 ; SI-LABEL: {{^}}ule:
    186 ; SI: v_cmpx_nlt_f32_e32 vcc, 2.0
    187 ; SI-NOT: s_and
    188 define amdgpu_gs void @ule(float %a) {
    189   %c1 = fcmp ule float %a, 2.0
    190   call void @llvm.amdgcn.kill(i1 %c1)
    191   ret void
    192 }
    193 
    194 ; SI-LABEL: {{^}}une:
    195 ; SI: v_cmpx_neq_f32_e32 vcc, 0
    196 ; SI-NOT: s_and
    197 define amdgpu_gs void @une(float %a) {
    198   %c1 = fcmp une float %a, 0.0
    199   call void @llvm.amdgcn.kill(i1 %c1)
    200   ret void
    201 }
    202 
    203 ; SI-LABEL: {{^}}neg_olt:
    204 ; SI: v_cmpx_ngt_f32_e32 vcc, 1.0
    205 ; SI-NOT: s_and
    206 define amdgpu_gs void @neg_olt(float %a) {
    207   %c1 = fcmp olt float %a, 1.0
    208   %c2 = xor i1 %c1, 1
    209   call void @llvm.amdgcn.kill(i1 %c2)
    210   ret void
    211 }
    212 
    213 ; SI-LABEL: {{^}}fcmp_x2:
    214 ; FIXME: LLVM should be able to combine these fcmp opcodes.
    215 ; SI: v_cmp_gt_f32
    216 ; SI: v_cndmask_b32
    217 ; SI: v_cmpx_le_f32
    218 define amdgpu_ps void @fcmp_x2(float %a) #0 {
    219   %ogt = fcmp nsz ogt float %a, 2.500000e-01
    220   %k = select i1 %ogt, float -1.000000e+00, float 0.000000e+00
    221   %c = fcmp nsz oge float %k, 0.000000e+00
    222   call void @llvm.amdgcn.kill(i1 %c) #1
    223   ret void
    224 }
    225 
    226 ; SI-LABEL: {{^}}wqm:
    227 ; SI: v_cmp_neq_f32_e32 vcc, 0
    228 ; SI: s_wqm_b64 s[0:1], vcc
    229 ; SI: s_and_b64 exec, exec, s[0:1]
    230 define amdgpu_ps void @wqm(float %a) {
    231   %c1 = fcmp une float %a, 0.0
    232   %c2 = call i1 @llvm.amdgcn.wqm.vote(i1 %c1)
    233   call void @llvm.amdgcn.kill(i1 %c2)
    234   ret void
    235 }
    236 
    237 ; This checks that we use the 64-bit encoding when the operand is a SGPR.
    238 ; SI-LABEL: {{^}}test_sgpr:
    239 ; SI: v_cmpx_ge_f32_e64
    240 define amdgpu_ps void @test_sgpr(float inreg %a) #0 {
    241   %c = fcmp ole float %a, 1.000000e+00
    242   call void @llvm.amdgcn.kill(i1 %c) #1
    243   ret void
    244 }
    245 
    246 ; SI-LABEL: {{^}}test_non_inline_imm_sgpr:
    247 ; SI-NOT: v_cmpx_ge_f32_e64
    248 define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 {
    249   %c = fcmp ole float %a, 1.500000e+00
    250   call void @llvm.amdgcn.kill(i1 %c) #1
    251   ret void
    252 }
    253 
    254 ; SI-LABEL: {{^}}test_scc_liveness:
    255 ; SI: v_cmp
    256 ; SI: s_and_b64 exec
    257 ; SI: s_cmp
    258 ; SI: s_cbranch_scc
    259 define amdgpu_ps void @test_scc_liveness() #0 {
    260 main_body:
    261   br label %loop3
    262 
    263 loop3:                                            ; preds = %loop3, %main_body
    264   %tmp = phi i32 [ 0, %main_body ], [ %tmp5, %loop3 ]
    265   %tmp1 = icmp sgt i32 %tmp, 0
    266   call void @llvm.amdgcn.kill(i1 %tmp1) #1
    267   %tmp5 = add i32 %tmp, 1
    268   br i1 %tmp1, label %endloop15, label %loop3
    269 
    270 endloop15:                                        ; preds = %loop3
    271   ret void
    272 }
    273 
    274 declare void @llvm.amdgcn.kill(i1) #0
    275 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
    276 declare i1 @llvm.amdgcn.wqm.vote(i1)
    277 
    278 attributes #0 = { nounwind }
    279