1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 3 4 declare float @fmaxf(float, float) 5 declare double @fmax(double, double) 6 declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) 7 declare float @llvm.maxnum.f32(float, float) 8 declare double @llvm.maxnum.f64(double, double) 9 declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) 10 11 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) 12 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) 13 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) 14 declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) 15 declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) 16 17 18 ; CHECK-LABEL: @test_fmaxf 19 ; SSE: movaps %xmm0, %xmm2 20 ; SSE-NEXT: cmpunordss %xmm2, %xmm2 21 ; SSE-NEXT: movaps %xmm2, %xmm3 22 ; SSE-NEXT: andps %xmm1, %xmm3 23 ; SSE-NEXT: maxss %xmm0, %xmm1 24 ; SSE-NEXT: andnps %xmm1, %xmm2 25 ; SSE-NEXT: orps %xmm3, %xmm2 26 ; SSE-NEXT: movaps %xmm2, %xmm0 27 ; SSE-NEXT: retq 28 ; 29 ; AVX: vmaxss %xmm0, %xmm1, %xmm2 30 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 31 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 32 ; AVX-NEXT: retq 33 define float @test_fmaxf(float %x, float %y) { 34 %z = call float @fmaxf(float %x, float %y) readnone 35 ret float %z 36 } 37 38 ; CHECK-LABEL: @test_fmaxf_minsize 39 ; CHECK: jmp fmaxf 40 define float @test_fmaxf_minsize(float %x, float %y) minsize { 41 %z = call float @fmaxf(float %x, float %y) readnone 42 ret float %z 43 } 44 45 ; FIXME: Doubles should be inlined similarly to floats. 46 47 ; CHECK-LABEL: @test_fmax 48 ; CHECK: jmp fmax 49 define double @test_fmax(double %x, double %y) { 50 %z = call double @fmax(double %x, double %y) readnone 51 ret double %z 52 } 53 54 ; CHECK-LABEL: @test_fmaxl 55 ; CHECK: callq fmaxl 56 define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) { 57 %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone 58 ret x86_fp80 %z 59 } 60 61 ; CHECK-LABEL: @test_intrinsic_fmaxf 62 ; SSE: movaps %xmm0, %xmm2 63 ; SSE-NEXT: cmpunordss %xmm2, %xmm2 64 ; SSE-NEXT: movaps %xmm2, %xmm3 65 ; SSE-NEXT: andps %xmm1, %xmm3 66 ; SSE-NEXT: maxss %xmm0, %xmm1 67 ; SSE-NEXT: andnps %xmm1, %xmm2 68 ; SSE-NEXT: orps %xmm3, %xmm2 69 ; SSE-NEXT: movaps %xmm2, %xmm0 70 ; SSE-NEXT: retq 71 ; 72 ; AVX: vmaxss %xmm0, %xmm1, %xmm2 73 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 74 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 75 ; AVX-NEXT: retq 76 define float @test_intrinsic_fmaxf(float %x, float %y) { 77 %z = call float @llvm.maxnum.f32(float %x, float %y) readnone 78 ret float %z 79 } 80 81 ; FIXME: Doubles should be inlined similarly to floats. 82 83 ; CHECK-LABEL: @test_intrinsic_fmax 84 ; CHECK: jmp fmax 85 define double @test_intrinsic_fmax(double %x, double %y) { 86 %z = call double @llvm.maxnum.f64(double %x, double %y) readnone 87 ret double %z 88 } 89 90 ; CHECK-LABEL: @test_intrinsic_fmaxl 91 ; CHECK: callq fmaxl 92 define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { 93 %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone 94 ret x86_fp80 %z 95 } 96 97 ; FIXME: This should not be doing 4 scalar ops on a 2 element vector. 98 ; FIXME: This should use vector ops (maxps / cmpps). 99 100 ; CHECK-LABEL: @test_intrinsic_fmax_v2f32 101 ; SSE: movaps %xmm1, %xmm2 102 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 103 ; SSE-NEXT: movaps %xmm0, %xmm3 104 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 105 ; SSE-NEXT: movaps %xmm3, %xmm4 106 ; SSE-NEXT: cmpunordss %xmm4, %xmm4 107 ; SSE-NEXT: movaps %xmm4, %xmm5 108 ; SSE-NEXT: andps %xmm2, %xmm5 109 ; SSE-NEXT: maxss %xmm3, %xmm2 110 ; SSE-NEXT: andnps %xmm2, %xmm4 111 ; SSE-NEXT: orps %xmm5, %xmm4 112 ; SSE-NEXT: movaps %xmm1, %xmm2 113 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] 114 ; SSE-NEXT: movaps %xmm0, %xmm5 115 ; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,2,3] 116 ; SSE-NEXT: movaps %xmm5, %xmm3 117 ; SSE-NEXT: cmpunordss %xmm3, %xmm3 118 ; SSE-NEXT: movaps %xmm3, %xmm6 119 ; SSE-NEXT: andps %xmm2, %xmm6 120 ; SSE-NEXT: maxss %xmm5, %xmm2 121 ; SSE-NEXT: andnps %xmm2, %xmm3 122 ; SSE-NEXT: orps %xmm6, %xmm3 123 ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 124 ; SSE-NEXT: movaps %xmm0, %xmm2 125 ; SSE-NEXT: cmpunordss %xmm2, %xmm2 126 ; SSE-NEXT: movaps %xmm2, %xmm4 127 ; SSE-NEXT: andps %xmm1, %xmm4 128 ; SSE-NEXT: movaps %xmm1, %xmm5 129 ; SSE-NEXT: maxss %xmm0, %xmm5 130 ; SSE-NEXT: andnps %xmm5, %xmm2 131 ; SSE-NEXT: orps %xmm4, %xmm2 132 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 133 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 134 ; SSE-NEXT: movapd %xmm0, %xmm4 135 ; SSE-NEXT: cmpunordss %xmm4, %xmm4 136 ; SSE-NEXT: movaps %xmm4, %xmm5 137 ; SSE-NEXT: andps %xmm1, %xmm5 138 ; SSE-NEXT: maxss %xmm0, %xmm1 139 ; SSE-NEXT: andnps %xmm1, %xmm4 140 ; SSE-NEXT: orps %xmm5, %xmm4 141 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 142 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 143 ; SSE-NEXT: movaps %xmm2, %xmm0 144 ; SSE-NEXT: retq 145 ; 146 ; AVX: vmaxss %xmm0, %xmm1, %xmm2 147 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 148 ; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm2 149 ; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 150 ; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 151 ; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 152 ; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 153 ; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 154 ; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 155 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 156 ; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] 157 ; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 158 ; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 159 ; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 160 ; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 161 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 162 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 163 ; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm3 164 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 165 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm3, %xmm0 166 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 167 ; AVX-NEXT: retq 168 define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) { 169 %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone 170 ret <2 x float> %z 171 } 172 173 ; FIXME: This should use vector ops (maxps / cmpps). 174 175 ; CHECK-LABEL: @test_intrinsic_fmax_v4f32 176 ; SSE: movaps %xmm1, %xmm2 177 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 178 ; SSE-NEXT: movaps %xmm0, %xmm3 179 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 180 ; SSE-NEXT: movaps %xmm3, %xmm4 181 ; SSE-NEXT: cmpunordss %xmm4, %xmm4 182 ; SSE-NEXT: movaps %xmm4, %xmm5 183 ; SSE-NEXT: andps %xmm2, %xmm5 184 ; SSE-NEXT: maxss %xmm3, %xmm2 185 ; SSE-NEXT: andnps %xmm2, %xmm4 186 ; SSE-NEXT: orps %xmm5, %xmm4 187 ; SSE-NEXT: movaps %xmm1, %xmm2 188 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] 189 ; SSE-NEXT: movaps %xmm0, %xmm5 190 ; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,2,3] 191 ; SSE-NEXT: movaps %xmm5, %xmm3 192 ; SSE-NEXT: cmpunordss %xmm3, %xmm3 193 ; SSE-NEXT: movaps %xmm3, %xmm6 194 ; SSE-NEXT: andps %xmm2, %xmm6 195 ; SSE-NEXT: maxss %xmm5, %xmm2 196 ; SSE-NEXT: andnps %xmm2, %xmm3 197 ; SSE-NEXT: orps %xmm6, %xmm3 198 ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 199 ; SSE-NEXT: movaps %xmm0, %xmm2 200 ; SSE-NEXT: cmpunordss %xmm2, %xmm2 201 ; SSE-NEXT: movaps %xmm2, %xmm4 202 ; SSE-NEXT: andps %xmm1, %xmm4 203 ; SSE-NEXT: movaps %xmm1, %xmm5 204 ; SSE-NEXT: maxss %xmm0, %xmm5 205 ; SSE-NEXT: andnps %xmm5, %xmm2 206 ; SSE-NEXT: orps %xmm4, %xmm2 207 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 208 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 209 ; SSE-NEXT: movapd %xmm0, %xmm4 210 ; SSE-NEXT: cmpunordss %xmm4, %xmm4 211 ; SSE-NEXT: movaps %xmm4, %xmm5 212 ; SSE-NEXT: andps %xmm1, %xmm5 213 ; SSE-NEXT: maxss %xmm0, %xmm1 214 ; SSE-NEXT: andnps %xmm1, %xmm4 215 ; SSE-NEXT: orps %xmm5, %xmm4 216 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 217 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 218 ; SSE-NEXT: movaps %xmm2, %xmm0 219 ; SSE-NEXT: retq 220 ; 221 ; AVX: vmaxss %xmm0, %xmm1, %xmm2 222 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 223 ; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm2 224 ; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 225 ; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 226 ; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 227 ; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 228 ; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 229 ; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 230 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 231 ; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] 232 ; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 233 ; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 234 ; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 235 ; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 236 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 237 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 238 ; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm3 239 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 240 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm3, %xmm0 241 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 242 ; AVX-NEXT: retq 243 define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) { 244 %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone 245 ret <4 x float> %z 246 } 247 248 ; FIXME: Vector of doubles should be inlined similarly to vector of floats. 249 250 ; CHECK-LABEL: @test_intrinsic_fmax_v2f64 251 ; CHECK: callq fmax 252 ; CHECK: callq fmax 253 define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) { 254 %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone 255 ret <2 x double> %z 256 } 257 258 ; FIXME: Vector of doubles should be inlined similarly to vector of floats. 259 260 ; CHECK-LABEL: @test_intrinsic_fmax_v4f64 261 ; CHECK: callq fmax 262 ; CHECK: callq fmax 263 ; CHECK: callq fmax 264 ; CHECK: callq fmax 265 define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) { 266 %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone 267 ret <4 x double> %z 268 } 269 270 ; FIXME: Vector of doubles should be inlined similarly to vector of floats. 271 272 ; CHECK-LABEL: @test_intrinsic_fmax_v8f64 273 ; CHECK: callq fmax 274 ; CHECK: callq fmax 275 ; CHECK: callq fmax 276 ; CHECK: callq fmax 277 ; CHECK: callq fmax 278 ; CHECK: callq fmax 279 ; CHECK: callq fmax 280 ; CHECK: callq fmax 281 define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) { 282 %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone 283 ret <8 x double> %z 284 } 285 286