1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s 2 3 ; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine. 4 5 ; GCN-LABEL: {{^}}multi_use_fneg_src: 6 ; GCN: buffer_load_dword [[A:v[0-9]+]] 7 ; GCN: buffer_load_dword [[B:v[0-9]+]] 8 ; GCN: buffer_load_dword [[C:v[0-9]+]] 9 10 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]] 11 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]] 12 ; GCN: buffer_store_dword [[MUL]] 13 define amdgpu_kernel void @multi_use_fneg_src() #0 { 14 %a = load volatile float, float addrspace(1)* undef 15 %b = load volatile float, float addrspace(1)* undef 16 %x = load volatile i32, i32 addrspace(1)* undef 17 %y = load volatile i32, i32 addrspace(1)* undef 18 19 %mul = fmul float %a, %b 20 %neg.mul = fsub float -0.0, %mul 21 %cmp = fcmp oeq float %neg.mul, 4.0 22 %select = select i1 %cmp, i32 %x, i32 %y 23 store volatile i32 %select, i32 addrspace(1)* undef 24 store volatile float %mul, float addrspace(1)* undef 25 ret void 26 } 27 28 ; GCN-LABEL: {{^}}multi_foldable_use_fneg_src: 29 ; GCN: buffer_load_dword [[A:v[0-9]+]] 30 ; GCN: buffer_load_dword [[B:v[0-9]+]] 31 ; GCN: buffer_load_dword [[C:v[0-9]+]] 32 33 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]] 34 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]] 35 ; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]] 36 define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 { 37 %a = load volatile float, float addrspace(1)* undef 38 %b = load volatile float, float addrspace(1)* undef 39 %x = load volatile i32, i32 addrspace(1)* undef 40 %y = load volatile i32, i32 addrspace(1)* undef 41 42 %mul = fmul float %a, %b 43 %neg.mul = fsub float -0.0, %mul 44 %use1 = fmul float %mul, %neg.mul 45 %cmp = fcmp oeq float %neg.mul, 4.0 46 %select = select i1 %cmp, i32 %x, i32 %y 47 48 store volatile i32 %select, i32 addrspace(1)* undef 49 store volatile float %use1, float addrspace(1)* undef 50 ret void 51 } 52 53 ; GCN-LABEL: {{^}}multi_use_fneg: 54 ; GCN: buffer_load_dword [[A:v[0-9]+]] 55 ; GCN: buffer_load_dword [[B:v[0-9]+]] 56 ; GCN: buffer_load_dword [[C:v[0-9]+]] 57 58 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 59 ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]] 60 ; GCN-NOT: xor 61 ; GCN: buffer_store_dword [[MUL]] 62 define amdgpu_kernel void @multi_use_fneg() #0 { 63 %a = load volatile float, float addrspace(1)* undef 64 %b = load volatile float, float addrspace(1)* undef 65 %x = load volatile i32, i32 addrspace(1)* undef 66 %y = load volatile i32, i32 addrspace(1)* undef 67 68 %mul = fmul float %a, %b 69 %neg.mul = fsub float -0.0, %mul 70 %cmp = fcmp oeq float %neg.mul, 4.0 71 %select = select i1 %cmp, i32 %x, i32 %y 72 store volatile i32 %select, i32 addrspace(1)* undef 73 store volatile float %neg.mul, float addrspace(1)* undef 74 ret void 75 } 76 77 ; GCN-LABEL: {{^}}multi_foldable_use_fneg: 78 ; GCN: buffer_load_dword [[A:v[0-9]+]] 79 ; GCN: buffer_load_dword [[B:v[0-9]+]] 80 81 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]] 82 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]] 83 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]] 84 ; GCN: buffer_store_dword [[MUL1]] 85 define amdgpu_kernel void @multi_foldable_use_fneg() #0 { 86 %a = load volatile float, float addrspace(1)* undef 87 %b = load volatile float, float addrspace(1)* undef 88 %x = load volatile i32, i32 addrspace(1)* undef 89 %y = load volatile i32, i32 addrspace(1)* undef 90 %z = load volatile i32, i32 addrspace(1)* undef 91 92 %mul = fmul float %a, %b 93 %neg.mul = fsub float -0.0, %mul 94 %cmp = fcmp oeq float %neg.mul, 4.0 95 %select = select i1 %cmp, i32 %x, i32 %y 96 %use1 = fmul float %neg.mul, %mul 97 store volatile i32 %select, i32 addrspace(1)* undef 98 store volatile float %use1, float addrspace(1)* undef 99 ret void 100 } 101 102 ; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32: 103 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}} 104 define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 { 105 %a = load volatile float, float addrspace(1)* undef 106 %x = load volatile i32, i32 addrspace(1)* undef 107 %y = load volatile i32, i32 addrspace(1)* undef 108 %neg.a = fsub float -0.0, %a 109 %cmp = fcmp oeq float %neg.a, 4.0 110 %select = select i1 %cmp, i32 %x, i32 %y 111 store volatile i32 %select, i32 addrspace(1)* undef 112 ret void 113 } 114 115 ; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32: 116 ; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}} 117 define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 { 118 %a = load volatile float, float addrspace(1)* undef 119 %x = load volatile i32, i32 addrspace(1)* undef 120 %y = load volatile i32, i32 addrspace(1)* undef 121 %neg.a = fsub float -0.0, %a 122 %cmp = fcmp ogt float %neg.a, 4.0 123 %select = select i1 %cmp, i32 %x, i32 %y 124 store volatile i32 %select, i32 addrspace(1)* undef 125 ret void 126 } 127 128 ; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32: 129 ; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}} 130 define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 { 131 %a = load volatile float, float addrspace(1)* undef 132 %x = load volatile i32, i32 addrspace(1)* undef 133 %y = load volatile i32, i32 addrspace(1)* undef 134 %neg.a = fsub float -0.0, %a 135 %cmp = fcmp oge float %neg.a, 4.0 136 %select = select i1 %cmp, i32 %x, i32 %y 137 store volatile i32 %select, i32 addrspace(1)* undef 138 ret void 139 } 140 141 ; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32: 142 ; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}} 143 define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 { 144 %a = load volatile float, float addrspace(1)* undef 145 %x = load volatile i32, i32 addrspace(1)* undef 146 %y = load volatile i32, i32 addrspace(1)* undef 147 %neg.a = fsub float -0.0, %a 148 %cmp = fcmp olt float %neg.a, 4.0 149 %select = select i1 %cmp, i32 %x, i32 %y 150 store volatile i32 %select, i32 addrspace(1)* undef 151 ret void 152 } 153 154 ; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32: 155 ; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}} 156 define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 { 157 %a = load volatile float, float addrspace(1)* undef 158 %x = load volatile i32, i32 addrspace(1)* undef 159 %y = load volatile i32, i32 addrspace(1)* undef 160 %neg.a = fsub float -0.0, %a 161 %cmp = fcmp ole float %neg.a, 4.0 162 %select = select i1 %cmp, i32 %x, i32 %y 163 store volatile i32 %select, i32 addrspace(1)* undef 164 ret void 165 } 166 167 ; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32: 168 ; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}} 169 define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 { 170 %a = load volatile float, float addrspace(1)* undef 171 %x = load volatile i32, i32 addrspace(1)* undef 172 %y = load volatile i32, i32 addrspace(1)* undef 173 %neg.a = fsub float -0.0, %a 174 %cmp = fcmp one float %neg.a, 4.0 175 %select = select i1 %cmp, i32 %x, i32 %y 176 store volatile i32 %select, i32 addrspace(1)* undef 177 ret void 178 } 179 180 ; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32: 181 ; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}} 182 define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 { 183 %a = load volatile float, float addrspace(1)* undef 184 %x = load volatile i32, i32 addrspace(1)* undef 185 %y = load volatile i32, i32 addrspace(1)* undef 186 %neg.a = fsub float -0.0, %a 187 %cmp = fcmp ueq float %neg.a, 4.0 188 %select = select i1 %cmp, i32 %x, i32 %y 189 store volatile i32 %select, i32 addrspace(1)* undef 190 ret void 191 } 192 193 ; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32: 194 ; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}} 195 define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 { 196 %a = load volatile float, float addrspace(1)* undef 197 %x = load volatile i32, i32 addrspace(1)* undef 198 %y = load volatile i32, i32 addrspace(1)* undef 199 %neg.a = fsub float -0.0, %a 200 %cmp = fcmp ugt float %neg.a, 4.0 201 %select = select i1 %cmp, i32 %x, i32 %y 202 store volatile i32 %select, i32 addrspace(1)* undef 203 ret void 204 } 205 206 ; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32: 207 ; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}} 208 define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 { 209 %a = load volatile float, float addrspace(1)* undef 210 %x = load volatile i32, i32 addrspace(1)* undef 211 %y = load volatile i32, i32 addrspace(1)* undef 212 %neg.a = fsub float -0.0, %a 213 %cmp = fcmp uge float %neg.a, 4.0 214 %select = select i1 %cmp, i32 %x, i32 %y 215 store volatile i32 %select, i32 addrspace(1)* undef 216 ret void 217 } 218 219 ; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32: 220 ; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}} 221 define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 { 222 %a = load volatile float, float addrspace(1)* undef 223 %x = load volatile i32, i32 addrspace(1)* undef 224 %y = load volatile i32, i32 addrspace(1)* undef 225 %neg.a = fsub float -0.0, %a 226 %cmp = fcmp ult float %neg.a, 4.0 227 %select = select i1 %cmp, i32 %x, i32 %y 228 store volatile i32 %select, i32 addrspace(1)* undef 229 ret void 230 } 231 232 ; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32: 233 ; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}} 234 define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 { 235 %a = load volatile float, float addrspace(1)* undef 236 %x = load volatile i32, i32 addrspace(1)* undef 237 %y = load volatile i32, i32 addrspace(1)* undef 238 %neg.a = fsub float -0.0, %a 239 %cmp = fcmp ule float %neg.a, 4.0 240 %select = select i1 %cmp, i32 %x, i32 %y 241 store volatile i32 %select, i32 addrspace(1)* undef 242 ret void 243 } 244 245 ; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32: 246 ; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}} 247 define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 { 248 %a = load volatile float, float addrspace(1)* undef 249 %x = load volatile i32, i32 addrspace(1)* undef 250 %y = load volatile i32, i32 addrspace(1)* undef 251 %neg.a = fsub float -0.0, %a 252 %cmp = fcmp une float %neg.a, 4.0 253 %select = select i1 %cmp, i32 %x, i32 %y 254 store volatile i32 %select, i32 addrspace(1)* undef 255 ret void 256 } 257 258 attributes #0 = { nounwind } 259