Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
      3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
      4 
      5 declare float @fmaxf(float, float)
      6 declare double @fmax(double, double)
      7 declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
      8 declare float @llvm.maxnum.f32(float, float)
      9 declare double @llvm.maxnum.f64(double, double)
     10 declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
     11 
     12 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
     13 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
     14 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
     15 declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
     16 declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
     17 
     18 ; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
     19 
     20 define float @test_fmaxf(float %x, float %y) {
     21 ; SSE-LABEL: test_fmaxf:
     22 ; SSE:       # %bb.0:
     23 ; SSE-NEXT:    movaps %xmm0, %xmm2
     24 ; SSE-NEXT:    cmpunordss %xmm0, %xmm2
     25 ; SSE-NEXT:    movaps %xmm2, %xmm3
     26 ; SSE-NEXT:    andps %xmm1, %xmm3
     27 ; SSE-NEXT:    maxss %xmm0, %xmm1
     28 ; SSE-NEXT:    andnps %xmm1, %xmm2
     29 ; SSE-NEXT:    orps %xmm3, %xmm2
     30 ; SSE-NEXT:    movaps %xmm2, %xmm0
     31 ; SSE-NEXT:    retq
     32 ;
     33 ; AVX-LABEL: test_fmaxf:
     34 ; AVX:       # %bb.0:
     35 ; AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
     36 ; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
     37 ; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
     38 ; AVX-NEXT:    retq
     39   %z = call float @fmaxf(float %x, float %y) readnone
     40   ret float %z
     41 }
     42 
     43 define float @test_fmaxf_minsize(float %x, float %y) minsize {
     44 ; CHECK-LABEL: test_fmaxf_minsize:
     45 ; CHECK:       # %bb.0:
     46 ; CHECK-NEXT:    jmp fmaxf # TAILCALL
     47   %z = call float @fmaxf(float %x, float %y) readnone
     48   ret float %z
     49 }
     50 
     51 ; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
     52 
     53 define double @test_fmax(double %x, double %y) {
     54 ; SSE-LABEL: test_fmax:
     55 ; SSE:       # %bb.0:
     56 ; SSE-NEXT:    movapd %xmm0, %xmm2
     57 ; SSE-NEXT:    cmpunordsd %xmm0, %xmm2
     58 ; SSE-NEXT:    movapd %xmm2, %xmm3
     59 ; SSE-NEXT:    andpd %xmm1, %xmm3
     60 ; SSE-NEXT:    maxsd %xmm0, %xmm1
     61 ; SSE-NEXT:    andnpd %xmm1, %xmm2
     62 ; SSE-NEXT:    orpd %xmm3, %xmm2
     63 ; SSE-NEXT:    movapd %xmm2, %xmm0
     64 ; SSE-NEXT:    retq
     65 ;
     66 ; AVX-LABEL: test_fmax:
     67 ; AVX:       # %bb.0:
     68 ; AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm2
     69 ; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
     70 ; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
     71 ; AVX-NEXT:    retq
     72   %z = call double @fmax(double %x, double %y) readnone
     73   ret double %z
     74 }
     75 
     76 define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
     77 ; CHECK-LABEL: test_fmaxl:
     78 ; CHECK:       # %bb.0:
     79 ; CHECK-NEXT:    subq $40, %rsp
     80 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
     81 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
     82 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
     83 ; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
     84 ; CHECK-NEXT:    fstpt (%rsp)
     85 ; CHECK-NEXT:    callq fmaxl
     86 ; CHECK-NEXT:    addq $40, %rsp
     87 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
     88 ; CHECK-NEXT:    retq
     89   %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
     90   ret x86_fp80 %z
     91 }
     92 
     93 define float @test_intrinsic_fmaxf(float %x, float %y) {
     94 ; SSE-LABEL: test_intrinsic_fmaxf:
     95 ; SSE:       # %bb.0:
     96 ; SSE-NEXT:    movaps %xmm0, %xmm2
     97 ; SSE-NEXT:    cmpunordss %xmm0, %xmm2
     98 ; SSE-NEXT:    movaps %xmm2, %xmm3
     99 ; SSE-NEXT:    andps %xmm1, %xmm3
    100 ; SSE-NEXT:    maxss %xmm0, %xmm1
    101 ; SSE-NEXT:    andnps %xmm1, %xmm2
    102 ; SSE-NEXT:    orps %xmm3, %xmm2
    103 ; SSE-NEXT:    movaps %xmm2, %xmm0
    104 ; SSE-NEXT:    retq
    105 ;
    106 ; AVX-LABEL: test_intrinsic_fmaxf:
    107 ; AVX:       # %bb.0:
    108 ; AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
    109 ; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
    110 ; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
    111 ; AVX-NEXT:    retq
    112   %z = call float @llvm.maxnum.f32(float %x, float %y) readnone
    113   ret float %z
    114 }
    115 
    116 define double @test_intrinsic_fmax(double %x, double %y) {
    117 ; SSE-LABEL: test_intrinsic_fmax:
    118 ; SSE:       # %bb.0:
    119 ; SSE-NEXT:    movapd %xmm0, %xmm2
    120 ; SSE-NEXT:    cmpunordsd %xmm0, %xmm2
    121 ; SSE-NEXT:    movapd %xmm2, %xmm3
    122 ; SSE-NEXT:    andpd %xmm1, %xmm3
    123 ; SSE-NEXT:    maxsd %xmm0, %xmm1
    124 ; SSE-NEXT:    andnpd %xmm1, %xmm2
    125 ; SSE-NEXT:    orpd %xmm3, %xmm2
    126 ; SSE-NEXT:    movapd %xmm2, %xmm0
    127 ; SSE-NEXT:    retq
    128 ;
    129 ; AVX-LABEL: test_intrinsic_fmax:
    130 ; AVX:       # %bb.0:
    131 ; AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm2
    132 ; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
    133 ; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
    134 ; AVX-NEXT:    retq
    135   %z = call double @llvm.maxnum.f64(double %x, double %y) readnone
    136   ret double %z
    137 }
    138 
    139 define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
    140 ; CHECK-LABEL: test_intrinsic_fmaxl:
    141 ; CHECK:       # %bb.0:
    142 ; CHECK-NEXT:    subq $40, %rsp
    143 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
    144 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
    145 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
    146 ; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
    147 ; CHECK-NEXT:    fstpt (%rsp)
    148 ; CHECK-NEXT:    callq fmaxl
    149 ; CHECK-NEXT:    addq $40, %rsp
    150 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
    151 ; CHECK-NEXT:    retq
    152   %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
    153   ret x86_fp80 %z
    154 }
    155 
    156 define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
    157 ; SSE-LABEL: test_intrinsic_fmax_v2f32:
    158 ; SSE:       # %bb.0:
    159 ; SSE-NEXT:    movaps %xmm1, %xmm2
    160 ; SSE-NEXT:    maxps %xmm0, %xmm2
    161 ; SSE-NEXT:    cmpunordps %xmm0, %xmm0
    162 ; SSE-NEXT:    andps %xmm0, %xmm1
    163 ; SSE-NEXT:    andnps %xmm2, %xmm0
    164 ; SSE-NEXT:    orps %xmm1, %xmm0
    165 ; SSE-NEXT:    retq
    166 ;
    167 ; AVX-LABEL: test_intrinsic_fmax_v2f32:
    168 ; AVX:       # %bb.0:
    169 ; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm2
    170 ; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
    171 ; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
    172 ; AVX-NEXT:    retq
    173   %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
    174   ret <2 x float> %z
    175 }
    176 
    177 define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
    178 ; SSE-LABEL: test_intrinsic_fmax_v4f32:
    179 ; SSE:       # %bb.0:
    180 ; SSE-NEXT:    movaps %xmm1, %xmm2
    181 ; SSE-NEXT:    maxps %xmm0, %xmm2
    182 ; SSE-NEXT:    cmpunordps %xmm0, %xmm0
    183 ; SSE-NEXT:    andps %xmm0, %xmm1
    184 ; SSE-NEXT:    andnps %xmm2, %xmm0
    185 ; SSE-NEXT:    orps %xmm1, %xmm0
    186 ; SSE-NEXT:    retq
    187 ;
    188 ; AVX-LABEL: test_intrinsic_fmax_v4f32:
    189 ; AVX:       # %bb.0:
    190 ; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm2
    191 ; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
    192 ; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
    193 ; AVX-NEXT:    retq
    194   %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
    195   ret <4 x float> %z
    196 }
    197 
    198 define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
    199 ; SSE-LABEL: test_intrinsic_fmax_v2f64:
    200 ; SSE:       # %bb.0:
    201 ; SSE-NEXT:    movapd %xmm1, %xmm2
    202 ; SSE-NEXT:    maxpd %xmm0, %xmm2
    203 ; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
    204 ; SSE-NEXT:    andpd %xmm0, %xmm1
    205 ; SSE-NEXT:    andnpd %xmm2, %xmm0
    206 ; SSE-NEXT:    orpd %xmm1, %xmm0
    207 ; SSE-NEXT:    retq
    208 ;
    209 ; AVX-LABEL: test_intrinsic_fmax_v2f64:
    210 ; AVX:       # %bb.0:
    211 ; AVX-NEXT:    vmaxpd %xmm0, %xmm1, %xmm2
    212 ; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm0
    213 ; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
    214 ; AVX-NEXT:    retq
    215   %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
    216   ret <2 x double> %z
    217 }
    218 
    219 define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
    220 ; SSE-LABEL: test_intrinsic_fmax_v4f64:
    221 ; SSE:       # %bb.0:
    222 ; SSE-NEXT:    movapd %xmm2, %xmm4
    223 ; SSE-NEXT:    maxpd %xmm0, %xmm4
    224 ; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
    225 ; SSE-NEXT:    andpd %xmm0, %xmm2
    226 ; SSE-NEXT:    andnpd %xmm4, %xmm0
    227 ; SSE-NEXT:    orpd %xmm2, %xmm0
    228 ; SSE-NEXT:    movapd %xmm3, %xmm2
    229 ; SSE-NEXT:    maxpd %xmm1, %xmm2
    230 ; SSE-NEXT:    cmpunordpd %xmm1, %xmm1
    231 ; SSE-NEXT:    andpd %xmm1, %xmm3
    232 ; SSE-NEXT:    andnpd %xmm2, %xmm1
    233 ; SSE-NEXT:    orpd %xmm3, %xmm1
    234 ; SSE-NEXT:    retq
    235 ;
    236 ; AVX-LABEL: test_intrinsic_fmax_v4f64:
    237 ; AVX:       # %bb.0:
    238 ; AVX-NEXT:    vmaxpd %ymm0, %ymm1, %ymm2
    239 ; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
    240 ; AVX-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
    241 ; AVX-NEXT:    retq
    242   %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
    243   ret <4 x double> %z
    244 }
    245 
    246 define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
    247 ; SSE-LABEL: test_intrinsic_fmax_v8f64:
    248 ; SSE:       # %bb.0:
    249 ; SSE-NEXT:    movapd %xmm4, %xmm8
    250 ; SSE-NEXT:    maxpd %xmm0, %xmm8
    251 ; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
    252 ; SSE-NEXT:    andpd %xmm0, %xmm4
    253 ; SSE-NEXT:    andnpd %xmm8, %xmm0
    254 ; SSE-NEXT:    orpd %xmm4, %xmm0
    255 ; SSE-NEXT:    movapd %xmm5, %xmm4
    256 ; SSE-NEXT:    maxpd %xmm1, %xmm4
    257 ; SSE-NEXT:    cmpunordpd %xmm1, %xmm1
    258 ; SSE-NEXT:    andpd %xmm1, %xmm5
    259 ; SSE-NEXT:    andnpd %xmm4, %xmm1
    260 ; SSE-NEXT:    orpd %xmm5, %xmm1
    261 ; SSE-NEXT:    movapd %xmm6, %xmm4
    262 ; SSE-NEXT:    maxpd %xmm2, %xmm4
    263 ; SSE-NEXT:    cmpunordpd %xmm2, %xmm2
    264 ; SSE-NEXT:    andpd %xmm2, %xmm6
    265 ; SSE-NEXT:    andnpd %xmm4, %xmm2
    266 ; SSE-NEXT:    orpd %xmm6, %xmm2
    267 ; SSE-NEXT:    movapd %xmm7, %xmm4
    268 ; SSE-NEXT:    maxpd %xmm3, %xmm4
    269 ; SSE-NEXT:    cmpunordpd %xmm3, %xmm3
    270 ; SSE-NEXT:    andpd %xmm3, %xmm7
    271 ; SSE-NEXT:    andnpd %xmm4, %xmm3
    272 ; SSE-NEXT:    orpd %xmm7, %xmm3
    273 ; SSE-NEXT:    retq
    274 ;
    275 ; AVX-LABEL: test_intrinsic_fmax_v8f64:
    276 ; AVX:       # %bb.0:
    277 ; AVX-NEXT:    vmaxpd %ymm0, %ymm2, %ymm4
    278 ; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
    279 ; AVX-NEXT:    vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
    280 ; AVX-NEXT:    vmaxpd %ymm1, %ymm3, %ymm2
    281 ; AVX-NEXT:    vcmpunordpd %ymm1, %ymm1, %ymm1
    282 ; AVX-NEXT:    vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
    283 ; AVX-NEXT:    retq
    284   %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
    285   ret <8 x double> %z
    286 }
    287 
    288 ; The IR-level FMF propagate to the node. With nnan, there's no need to blend.
    289 
    290 define double @maxnum_intrinsic_nnan_fmf_f64(double %a, double %b) {
    291 ; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f64:
    292 ; SSE:       # %bb.0:
    293 ; SSE-NEXT:    maxsd %xmm1, %xmm0
    294 ; SSE-NEXT:    retq
    295 ;
    296 ; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f64:
    297 ; AVX:       # %bb.0:
    298 ; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
    299 ; AVX-NEXT:    retq
    300   %r = tail call nnan double @llvm.maxnum.f64(double %a, double %b)
    301   ret double %r
    302 }
    303 
    304 ; Make sure vectors work too.
    305 
    306 define <4 x float> @maxnum_intrinsic_nnan_fmf_f432(<4 x float> %a, <4 x float> %b) {
    307 ; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f432:
    308 ; SSE:       # %bb.0:
    309 ; SSE-NEXT:    maxps %xmm1, %xmm0
    310 ; SSE-NEXT:    retq
    311 ;
    312 ; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f432:
    313 ; AVX:       # %bb.0:
    314 ; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
    315 ; AVX-NEXT:    retq
    316   %r = tail call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
    317   ret <4 x float> %r
    318 }
    319 
    320 ; Current (but legacy someday): a function-level attribute should also enable the fold.
    321 
    322 define float @maxnum_intrinsic_nnan_attr_f32(float %a, float %b) #0 {
    323 ; SSE-LABEL: maxnum_intrinsic_nnan_attr_f32:
    324 ; SSE:       # %bb.0:
    325 ; SSE-NEXT:    maxss %xmm1, %xmm0
    326 ; SSE-NEXT:    retq
    327 ;
    328 ; AVX-LABEL: maxnum_intrinsic_nnan_attr_f32:
    329 ; AVX:       # %bb.0:
    330 ; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
    331 ; AVX-NEXT:    retq
    332   %r = tail call float @llvm.maxnum.f32(float %a, float %b)
    333   ret float %r
    334 }
    335 
    336 ; Make sure vectors work too.
    337 
    338 define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double> %b) #0 {
    339 ; SSE-LABEL: maxnum_intrinsic_nnan_attr_f64:
    340 ; SSE:       # %bb.0:
    341 ; SSE-NEXT:    maxpd %xmm1, %xmm0
    342 ; SSE-NEXT:    retq
    343 ;
    344 ; AVX-LABEL: maxnum_intrinsic_nnan_attr_f64:
    345 ; AVX:       # %bb.0:
    346 ; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
    347 ; AVX-NEXT:    retq
    348   %r = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b)
    349   ret <2 x double> %r
    350 }
    351 
    352 attributes #0 = { "no-nans-fp-math"="true" }
    353 
    354