Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
      2 
      3 ; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine.
      4 
      5 ; GCN-LABEL: {{^}}multi_use_fneg_src:
      6 ; GCN: buffer_load_dword [[A:v[0-9]+]]
      7 ; GCN: buffer_load_dword [[B:v[0-9]+]]
      8 ; GCN: buffer_load_dword [[C:v[0-9]+]]
      9 
     10 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
     11 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]]
     12 ; GCN: buffer_store_dword [[MUL]]
     13 define amdgpu_kernel void @multi_use_fneg_src() #0 {
     14   %a = load volatile float, float addrspace(1)* undef
     15   %b = load volatile float, float addrspace(1)* undef
     16   %x = load volatile i32, i32 addrspace(1)* undef
     17   %y = load volatile i32, i32 addrspace(1)* undef
     18 
     19   %mul = fmul float %a, %b
     20   %neg.mul = fsub float -0.0, %mul
     21   %cmp = fcmp oeq float %neg.mul, 4.0
     22   %select = select i1 %cmp, i32 %x, i32 %y
     23   store volatile i32 %select, i32 addrspace(1)* undef
     24   store volatile float %mul, float addrspace(1)* undef
     25   ret void
     26 }
     27 
     28 ; GCN-LABEL: {{^}}multi_foldable_use_fneg_src:
     29 ; GCN: buffer_load_dword [[A:v[0-9]+]]
     30 ; GCN: buffer_load_dword [[B:v[0-9]+]]
     31 ; GCN: buffer_load_dword [[C:v[0-9]+]]
     32 
     33 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
     34 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]]
     35 ; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]]
     36 define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
     37   %a = load volatile float, float addrspace(1)* undef
     38   %b = load volatile float, float addrspace(1)* undef
     39   %x = load volatile i32, i32 addrspace(1)* undef
     40   %y = load volatile i32, i32 addrspace(1)* undef
     41 
     42   %mul = fmul float %a, %b
     43   %neg.mul = fsub float -0.0, %mul
     44   %use1 = fmul float %mul, %neg.mul
     45   %cmp = fcmp oeq float %neg.mul, 4.0
     46   %select = select i1 %cmp, i32 %x, i32 %y
     47 
     48   store volatile i32 %select, i32 addrspace(1)* undef
     49   store volatile float %use1, float addrspace(1)* undef
     50   ret void
     51 }
     52 
     53 ; GCN-LABEL: {{^}}multi_use_fneg:
     54 ; GCN: buffer_load_dword [[A:v[0-9]+]]
     55 ; GCN: buffer_load_dword [[B:v[0-9]+]]
     56 ; GCN: buffer_load_dword [[C:v[0-9]+]]
     57 
     58 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
     59 ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]]
     60 ; GCN-NOT: xor
     61 ; GCN: buffer_store_dword [[MUL]]
     62 define amdgpu_kernel void @multi_use_fneg() #0 {
     63   %a = load volatile float, float addrspace(1)* undef
     64   %b = load volatile float, float addrspace(1)* undef
     65   %x = load volatile i32, i32 addrspace(1)* undef
     66   %y = load volatile i32, i32 addrspace(1)* undef
     67 
     68   %mul = fmul float %a, %b
     69   %neg.mul = fsub float -0.0, %mul
     70   %cmp = fcmp oeq float %neg.mul, 4.0
     71   %select = select i1 %cmp, i32 %x, i32 %y
     72   store volatile i32 %select, i32 addrspace(1)* undef
     73   store volatile float %neg.mul, float addrspace(1)* undef
     74   ret void
     75 }
     76 
     77 ; GCN-LABEL: {{^}}multi_foldable_use_fneg:
     78 ; GCN: buffer_load_dword [[A:v[0-9]+]]
     79 ; GCN: buffer_load_dword [[B:v[0-9]+]]
     80 
     81 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
     82 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]]
     83 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]]
     84 ; GCN: buffer_store_dword [[MUL1]]
     85 define amdgpu_kernel void @multi_foldable_use_fneg() #0 {
     86   %a = load volatile float, float addrspace(1)* undef
     87   %b = load volatile float, float addrspace(1)* undef
     88   %x = load volatile i32, i32 addrspace(1)* undef
     89   %y = load volatile i32, i32 addrspace(1)* undef
     90   %z = load volatile i32, i32 addrspace(1)* undef
     91 
     92   %mul = fmul float %a, %b
     93   %neg.mul = fsub float -0.0, %mul
     94   %cmp = fcmp oeq float %neg.mul, 4.0
     95   %select = select i1 %cmp, i32 %x, i32 %y
     96   %use1 = fmul float %neg.mul, %mul
     97   store volatile i32 %select, i32 addrspace(1)* undef
     98   store volatile float %use1, float addrspace(1)* undef
     99   ret void
    100 }
    101 
    102 ; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32:
    103 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}}
    104 define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 {
    105   %a = load volatile float, float addrspace(1)* undef
    106   %x = load volatile i32, i32 addrspace(1)* undef
    107   %y = load volatile i32, i32 addrspace(1)* undef
    108   %neg.a = fsub float -0.0, %a
    109   %cmp = fcmp oeq float %neg.a, 4.0
    110   %select = select i1 %cmp, i32 %x, i32 %y
    111   store volatile i32 %select, i32 addrspace(1)* undef
    112   ret void
    113 }
    114 
    115 ; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32:
    116 ; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}}
    117 define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 {
    118   %a = load volatile float, float addrspace(1)* undef
    119   %x = load volatile i32, i32 addrspace(1)* undef
    120   %y = load volatile i32, i32 addrspace(1)* undef
    121   %neg.a = fsub float -0.0, %a
    122   %cmp = fcmp ogt float %neg.a, 4.0
    123   %select = select i1 %cmp, i32 %x, i32 %y
    124   store volatile i32 %select, i32 addrspace(1)* undef
    125   ret void
    126 }
    127 
    128 ; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32:
    129 ; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}}
    130 define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 {
    131   %a = load volatile float, float addrspace(1)* undef
    132   %x = load volatile i32, i32 addrspace(1)* undef
    133   %y = load volatile i32, i32 addrspace(1)* undef
    134   %neg.a = fsub float -0.0, %a
    135   %cmp = fcmp oge float %neg.a, 4.0
    136   %select = select i1 %cmp, i32 %x, i32 %y
    137   store volatile i32 %select, i32 addrspace(1)* undef
    138   ret void
    139 }
    140 
    141 ; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32:
    142 ; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}}
    143 define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 {
    144   %a = load volatile float, float addrspace(1)* undef
    145   %x = load volatile i32, i32 addrspace(1)* undef
    146   %y = load volatile i32, i32 addrspace(1)* undef
    147   %neg.a = fsub float -0.0, %a
    148   %cmp = fcmp olt float %neg.a, 4.0
    149   %select = select i1 %cmp, i32 %x, i32 %y
    150   store volatile i32 %select, i32 addrspace(1)* undef
    151   ret void
    152 }
    153 
    154 ; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32:
    155 ; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}}
    156 define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 {
    157   %a = load volatile float, float addrspace(1)* undef
    158   %x = load volatile i32, i32 addrspace(1)* undef
    159   %y = load volatile i32, i32 addrspace(1)* undef
    160   %neg.a = fsub float -0.0, %a
    161   %cmp = fcmp ole float %neg.a, 4.0
    162   %select = select i1 %cmp, i32 %x, i32 %y
    163   store volatile i32 %select, i32 addrspace(1)* undef
    164   ret void
    165 }
    166 
    167 ; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32:
    168 ; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}}
    169 define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 {
    170   %a = load volatile float, float addrspace(1)* undef
    171   %x = load volatile i32, i32 addrspace(1)* undef
    172   %y = load volatile i32, i32 addrspace(1)* undef
    173   %neg.a = fsub float -0.0, %a
    174   %cmp = fcmp one float %neg.a, 4.0
    175   %select = select i1 %cmp, i32 %x, i32 %y
    176   store volatile i32 %select, i32 addrspace(1)* undef
    177   ret void
    178 }
    179 
    180 ; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32:
    181 ; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}}
    182 define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 {
    183   %a = load volatile float, float addrspace(1)* undef
    184   %x = load volatile i32, i32 addrspace(1)* undef
    185   %y = load volatile i32, i32 addrspace(1)* undef
    186   %neg.a = fsub float -0.0, %a
    187   %cmp = fcmp ueq float %neg.a, 4.0
    188   %select = select i1 %cmp, i32 %x, i32 %y
    189   store volatile i32 %select, i32 addrspace(1)* undef
    190   ret void
    191 }
    192 
    193 ; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32:
    194 ; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}}
    195 define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 {
    196   %a = load volatile float, float addrspace(1)* undef
    197   %x = load volatile i32, i32 addrspace(1)* undef
    198   %y = load volatile i32, i32 addrspace(1)* undef
    199   %neg.a = fsub float -0.0, %a
    200   %cmp = fcmp ugt float %neg.a, 4.0
    201   %select = select i1 %cmp, i32 %x, i32 %y
    202   store volatile i32 %select, i32 addrspace(1)* undef
    203   ret void
    204 }
    205 
    206 ; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32:
    207 ; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}}
    208 define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 {
    209   %a = load volatile float, float addrspace(1)* undef
    210   %x = load volatile i32, i32 addrspace(1)* undef
    211   %y = load volatile i32, i32 addrspace(1)* undef
    212   %neg.a = fsub float -0.0, %a
    213   %cmp = fcmp uge float %neg.a, 4.0
    214   %select = select i1 %cmp, i32 %x, i32 %y
    215   store volatile i32 %select, i32 addrspace(1)* undef
    216   ret void
    217 }
    218 
    219 ; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32:
    220 ; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}}
    221 define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 {
    222   %a = load volatile float, float addrspace(1)* undef
    223   %x = load volatile i32, i32 addrspace(1)* undef
    224   %y = load volatile i32, i32 addrspace(1)* undef
    225   %neg.a = fsub float -0.0, %a
    226   %cmp = fcmp ult float %neg.a, 4.0
    227   %select = select i1 %cmp, i32 %x, i32 %y
    228   store volatile i32 %select, i32 addrspace(1)* undef
    229   ret void
    230 }
    231 
    232 ; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32:
    233 ; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}}
    234 define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 {
    235   %a = load volatile float, float addrspace(1)* undef
    236   %x = load volatile i32, i32 addrspace(1)* undef
    237   %y = load volatile i32, i32 addrspace(1)* undef
    238   %neg.a = fsub float -0.0, %a
    239   %cmp = fcmp ule float %neg.a, 4.0
    240   %select = select i1 %cmp, i32 %x, i32 %y
    241   store volatile i32 %select, i32 addrspace(1)* undef
    242   ret void
    243 }
    244 
    245 ; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32:
    246 ; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}}
    247 define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 {
    248   %a = load volatile float, float addrspace(1)* undef
    249   %x = load volatile i32, i32 addrspace(1)* undef
    250   %y = load volatile i32, i32 addrspace(1)* undef
    251   %neg.a = fsub float -0.0, %a
    252   %cmp = fcmp une float %neg.a, 4.0
    253   %select = select i1 %cmp, i32 %x, i32 %y
    254   store volatile i32 %select, i32 addrspace(1)* undef
    255   ret void
    256 }
    257 
    258 attributes #0 = { nounwind }
    259