Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
      2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX
      3 
      4 ; Incremental updates of the instruction depths should be enough for this test
      5 ; case.
      6 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
      7 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX
      8 
      9 ; Verify that the first two adds are independent regardless of how the inputs are
     10 ; commuted. The destination registers are used as source registers for the third add.
     11 
     12 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
     13 ; SSE-LABEL: reassociate_adds1:
     14 ; SSE:       # %bb.0:
     15 ; SSE-NEXT:    addss %xmm1, %xmm0
     16 ; SSE-NEXT:    addss %xmm3, %xmm2
     17 ; SSE-NEXT:    addss %xmm2, %xmm0
     18 ; SSE-NEXT:    retq
     19 ;
     20 ; AVX-LABEL: reassociate_adds1:
     21 ; AVX:       # %bb.0:
     22 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     23 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     24 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     25 ; AVX-NEXT:    retq
     26   %t0 = fadd float %x0, %x1
     27   %t1 = fadd float %t0, %x2
     28   %t2 = fadd float %t1, %x3
     29   ret float %t2
     30 }
     31 
     32 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
     33 ; SSE-LABEL: reassociate_adds2:
     34 ; SSE:       # %bb.0:
     35 ; SSE-NEXT:    addss %xmm1, %xmm0
     36 ; SSE-NEXT:    addss %xmm3, %xmm2
     37 ; SSE-NEXT:    addss %xmm2, %xmm0
     38 ; SSE-NEXT:    retq
     39 ;
     40 ; AVX-LABEL: reassociate_adds2:
     41 ; AVX:       # %bb.0:
     42 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     43 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     44 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     45 ; AVX-NEXT:    retq
     46   %t0 = fadd float %x0, %x1
     47   %t1 = fadd float %x2, %t0
     48   %t2 = fadd float %t1, %x3
     49   ret float %t2
     50 }
     51 
     52 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
     53 ; SSE-LABEL: reassociate_adds3:
     54 ; SSE:       # %bb.0:
     55 ; SSE-NEXT:    addss %xmm1, %xmm0
     56 ; SSE-NEXT:    addss %xmm3, %xmm2
     57 ; SSE-NEXT:    addss %xmm2, %xmm0
     58 ; SSE-NEXT:    retq
     59 ;
     60 ; AVX-LABEL: reassociate_adds3:
     61 ; AVX:       # %bb.0:
     62 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     63 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     64 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     65 ; AVX-NEXT:    retq
     66   %t0 = fadd float %x0, %x1
     67   %t1 = fadd float %t0, %x2
     68   %t2 = fadd float %x3, %t1
     69   ret float %t2
     70 }
     71 
     72 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
     73 ; SSE-LABEL: reassociate_adds4:
     74 ; SSE:       # %bb.0:
     75 ; SSE-NEXT:    addss %xmm1, %xmm0
     76 ; SSE-NEXT:    addss %xmm3, %xmm2
     77 ; SSE-NEXT:    addss %xmm2, %xmm0
     78 ; SSE-NEXT:    retq
     79 ;
     80 ; AVX-LABEL: reassociate_adds4:
     81 ; AVX:       # %bb.0:
     82 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     83 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     84 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     85 ; AVX-NEXT:    retq
     86   %t0 = fadd float %x0, %x1
     87   %t1 = fadd float %x2, %t0
     88   %t2 = fadd float %x3, %t1
     89   ret float %t2
     90 }
     91 
     92 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
     93 ; produced because that would cost more compile time.
     94 
     95 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
     96 ; SSE-LABEL: reassociate_adds5:
     97 ; SSE:       # %bb.0:
     98 ; SSE-NEXT:    addss %xmm1, %xmm0
     99 ; SSE-NEXT:    addss %xmm3, %xmm2
    100 ; SSE-NEXT:    addss %xmm2, %xmm0
    101 ; SSE-NEXT:    addss %xmm5, %xmm4
    102 ; SSE-NEXT:    addss %xmm6, %xmm4
    103 ; SSE-NEXT:    addss %xmm4, %xmm0
    104 ; SSE-NEXT:    addss %xmm7, %xmm0
    105 ; SSE-NEXT:    retq
    106 ;
    107 ; AVX-LABEL: reassociate_adds5:
    108 ; AVX:       # %bb.0:
    109 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    110 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
    111 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    112 ; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
    113 ; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
    114 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    115 ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
    116 ; AVX-NEXT:    retq
    117   %t0 = fadd float %x0, %x1
    118   %t1 = fadd float %t0, %x2
    119   %t2 = fadd float %t1, %x3
    120   %t3 = fadd float %t2, %x4
    121   %t4 = fadd float %t3, %x5
    122   %t5 = fadd float %t4, %x6
    123   %t6 = fadd float %t5, %x7
    124   ret float %t6
    125 }
    126 
    127 ; Verify that we only need two associative operations to reassociate the operands.
    128 ; Also, we should reassociate such that the result of the high latency division
    129 ; is used by the final 'add' rather than reassociating the %x3 operand with the
    130 ; division. The latter reassociation would not improve anything.
    131 
    132 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
    133 ; SSE-LABEL: reassociate_adds6:
    134 ; SSE:       # %bb.0:
    135 ; SSE-NEXT:    divss %xmm1, %xmm0
    136 ; SSE-NEXT:    addss %xmm3, %xmm2
    137 ; SSE-NEXT:    addss %xmm2, %xmm0
    138 ; SSE-NEXT:    retq
    139 ;
    140 ; AVX-LABEL: reassociate_adds6:
    141 ; AVX:       # %bb.0:
    142 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    143 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
    144 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    145 ; AVX-NEXT:    retq
    146   %t0 = fdiv float %x0, %x1
    147   %t1 = fadd float %x2, %t0
    148   %t2 = fadd float %x3, %t1
    149   ret float %t2
    150 }
    151 
    152 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
    153 
    154 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
    155 ; SSE-LABEL: reassociate_muls1:
    156 ; SSE:       # %bb.0:
    157 ; SSE-NEXT:    divss %xmm1, %xmm0
    158 ; SSE-NEXT:    mulss %xmm3, %xmm2
    159 ; SSE-NEXT:    mulss %xmm2, %xmm0
    160 ; SSE-NEXT:    retq
    161 ;
    162 ; AVX-LABEL: reassociate_muls1:
    163 ; AVX:       # %bb.0:
    164 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    165 ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
    166 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
    167 ; AVX-NEXT:    retq
    168   %t0 = fdiv float %x0, %x1
    169   %t1 = fmul float %x2, %t0
    170   %t2 = fmul float %x3, %t1
    171   ret float %t2
    172 }
    173 
    174 ; Verify that SSE and AVX scalar double-precision adds are reassociated.
    175 
    176 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
    177 ; SSE-LABEL: reassociate_adds_double:
    178 ; SSE:       # %bb.0:
    179 ; SSE-NEXT:    divsd %xmm1, %xmm0
    180 ; SSE-NEXT:    addsd %xmm3, %xmm2
    181 ; SSE-NEXT:    addsd %xmm2, %xmm0
    182 ; SSE-NEXT:    retq
    183 ;
    184 ; AVX-LABEL: reassociate_adds_double:
    185 ; AVX:       # %bb.0:
    186 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    187 ; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
    188 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
    189 ; AVX-NEXT:    retq
    190   %t0 = fdiv double %x0, %x1
    191   %t1 = fadd double %x2, %t0
    192   %t2 = fadd double %x3, %t1
    193   ret double %t2
    194 }
    195 
    196 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
    197 
    198 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
    199 ; SSE-LABEL: reassociate_muls_double:
    200 ; SSE:       # %bb.0:
    201 ; SSE-NEXT:    divsd %xmm1, %xmm0
    202 ; SSE-NEXT:    mulsd %xmm3, %xmm2
    203 ; SSE-NEXT:    mulsd %xmm2, %xmm0
    204 ; SSE-NEXT:    retq
    205 ;
    206 ; AVX-LABEL: reassociate_muls_double:
    207 ; AVX:       # %bb.0:
    208 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    209 ; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
    210 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    211 ; AVX-NEXT:    retq
    212   %t0 = fdiv double %x0, %x1
    213   %t1 = fmul double %x2, %t0
    214   %t2 = fmul double %x3, %t1
    215   ret double %t2
    216 }
    217 
    218 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
    219 
    220 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    221 ; SSE-LABEL: reassociate_adds_v4f32:
    222 ; SSE:       # %bb.0:
    223 ; SSE-NEXT:    mulps %xmm1, %xmm0
    224 ; SSE-NEXT:    addps %xmm3, %xmm2
    225 ; SSE-NEXT:    addps %xmm2, %xmm0
    226 ; SSE-NEXT:    retq
    227 ;
    228 ; AVX-LABEL: reassociate_adds_v4f32:
    229 ; AVX:       # %bb.0:
    230 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    231 ; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
    232 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    233 ; AVX-NEXT:    retq
    234   %t0 = fmul <4 x float> %x0, %x1
    235   %t1 = fadd <4 x float> %x2, %t0
    236   %t2 = fadd <4 x float> %x3, %t1
    237   ret <4 x float> %t2
    238 }
    239 
    240 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
    241 
    242 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    243 ; SSE-LABEL: reassociate_adds_v2f64:
    244 ; SSE:       # %bb.0:
    245 ; SSE-NEXT:    mulpd %xmm1, %xmm0
    246 ; SSE-NEXT:    addpd %xmm3, %xmm2
    247 ; SSE-NEXT:    addpd %xmm2, %xmm0
    248 ; SSE-NEXT:    retq
    249 ;
    250 ; AVX-LABEL: reassociate_adds_v2f64:
    251 ; AVX:       # %bb.0:
    252 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
    253 ; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
    254 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    255 ; AVX-NEXT:    retq
    256   %t0 = fmul <2 x double> %x0, %x1
    257   %t1 = fadd <2 x double> %x2, %t0
    258   %t2 = fadd <2 x double> %x3, %t1
    259   ret <2 x double> %t2
    260 }
    261 
    262 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
    263 
    264 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    265 ; SSE-LABEL: reassociate_muls_v4f32:
    266 ; SSE:       # %bb.0:
    267 ; SSE-NEXT:    addps %xmm1, %xmm0
    268 ; SSE-NEXT:    mulps %xmm3, %xmm2
    269 ; SSE-NEXT:    mulps %xmm2, %xmm0
    270 ; SSE-NEXT:    retq
    271 ;
    272 ; AVX-LABEL: reassociate_muls_v4f32:
    273 ; AVX:       # %bb.0:
    274 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    275 ; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
    276 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    277 ; AVX-NEXT:    retq
    278   %t0 = fadd <4 x float> %x0, %x1
    279   %t1 = fmul <4 x float> %x2, %t0
    280   %t2 = fmul <4 x float> %x3, %t1
    281   ret <4 x float> %t2
    282 }
    283 
    284 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
    285 
    286 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    287 ; SSE-LABEL: reassociate_muls_v2f64:
    288 ; SSE:       # %bb.0:
    289 ; SSE-NEXT:    addpd %xmm1, %xmm0
    290 ; SSE-NEXT:    mulpd %xmm3, %xmm2
    291 ; SSE-NEXT:    mulpd %xmm2, %xmm0
    292 ; SSE-NEXT:    retq
    293 ;
    294 ; AVX-LABEL: reassociate_muls_v2f64:
    295 ; AVX:       # %bb.0:
    296 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    297 ; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
    298 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
    299 ; AVX-NEXT:    retq
    300   %t0 = fadd <2 x double> %x0, %x1
    301   %t1 = fmul <2 x double> %x2, %t0
    302   %t2 = fmul <2 x double> %x3, %t1
    303   ret <2 x double> %t2
    304 }
    305 
    306 ; Verify that AVX 256-bit vector single-precision adds are reassociated.
    307 
    308 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    309 ; AVX-LABEL: reassociate_adds_v8f32:
    310 ; AVX:       # %bb.0:
    311 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
    312 ; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
    313 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    314 ; AVX-NEXT:    retq
    315   %t0 = fmul <8 x float> %x0, %x1
    316   %t1 = fadd <8 x float> %x2, %t0
    317   %t2 = fadd <8 x float> %x3, %t1
    318   ret <8 x float> %t2
    319 }
    320 
    321 ; Verify that AVX 256-bit vector double-precision adds are reassociated.
    322 
    323 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    324 ; AVX-LABEL: reassociate_adds_v4f64:
    325 ; AVX:       # %bb.0:
    326 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
    327 ; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
    328 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    329 ; AVX-NEXT:    retq
    330   %t0 = fmul <4 x double> %x0, %x1
    331   %t1 = fadd <4 x double> %x2, %t0
    332   %t2 = fadd <4 x double> %x3, %t1
    333   ret <4 x double> %t2
    334 }
    335 
    336 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
    337 
    338 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    339 ; AVX-LABEL: reassociate_muls_v8f32:
    340 ; AVX:       # %bb.0:
    341 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    342 ; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
    343 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
    344 ; AVX-NEXT:    retq
    345   %t0 = fadd <8 x float> %x0, %x1
    346   %t1 = fmul <8 x float> %x2, %t0
    347   %t2 = fmul <8 x float> %x3, %t1
    348   ret <8 x float> %t2
    349 }
    350 
    351 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
    352 
    353 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    354 ; AVX-LABEL: reassociate_muls_v4f64:
    355 ; AVX:       # %bb.0:
    356 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    357 ; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
    358 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
    359 ; AVX-NEXT:    retq
    360   %t0 = fadd <4 x double> %x0, %x1
    361   %t1 = fmul <4 x double> %x2, %t0
    362   %t2 = fmul <4 x double> %x3, %t1
    363   ret <4 x double> %t2
    364 }
    365 
    366 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
    367 
    368 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
    369 ; SSE-LABEL: reassociate_mins_single:
    370 ; SSE:       # %bb.0:
    371 ; SSE-NEXT:    divss %xmm1, %xmm0
    372 ; SSE-NEXT:    minss %xmm3, %xmm2
    373 ; SSE-NEXT:    minss %xmm2, %xmm0
    374 ; SSE-NEXT:    retq
    375 ;
    376 ; AVX-LABEL: reassociate_mins_single:
    377 ; AVX:       # %bb.0:
    378 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    379 ; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
    380 ; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
    381 ; AVX-NEXT:    retq
    382   %t0 = fdiv float %x0, %x1
    383   %cmp1 = fcmp olt float %x2, %t0
    384   %sel1 = select i1 %cmp1, float %x2, float %t0
    385   %cmp2 = fcmp olt float %x3, %sel1
    386   %sel2 = select i1 %cmp2, float %x3, float %sel1
    387   ret float %sel2
    388 }
    389 
    390 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
    391 
    392 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
    393 ; SSE-LABEL: reassociate_maxs_single:
    394 ; SSE:       # %bb.0:
    395 ; SSE-NEXT:    divss %xmm1, %xmm0
    396 ; SSE-NEXT:    maxss %xmm3, %xmm2
    397 ; SSE-NEXT:    maxss %xmm2, %xmm0
    398 ; SSE-NEXT:    retq
    399 ;
    400 ; AVX-LABEL: reassociate_maxs_single:
    401 ; AVX:       # %bb.0:
    402 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    403 ; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
    404 ; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
    405 ; AVX-NEXT:    retq
    406   %t0 = fdiv float %x0, %x1
    407   %cmp1 = fcmp ogt float %x2, %t0
    408   %sel1 = select i1 %cmp1, float %x2, float %t0
    409   %cmp2 = fcmp ogt float %x3, %sel1
    410   %sel2 = select i1 %cmp2, float %x3, float %sel1
    411   ret float %sel2
    412 }
    413 
    414 ; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
    415 
    416 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
    417 ; SSE-LABEL: reassociate_mins_double:
    418 ; SSE:       # %bb.0:
    419 ; SSE-NEXT:    divsd %xmm1, %xmm0
    420 ; SSE-NEXT:    minsd %xmm3, %xmm2
    421 ; SSE-NEXT:    minsd %xmm2, %xmm0
    422 ; SSE-NEXT:    retq
    423 ;
    424 ; AVX-LABEL: reassociate_mins_double:
    425 ; AVX:       # %bb.0:
    426 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    427 ; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
    428 ; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
    429 ; AVX-NEXT:    retq
    430   %t0 = fdiv double %x0, %x1
    431   %cmp1 = fcmp olt double %x2, %t0
    432   %sel1 = select i1 %cmp1, double %x2, double %t0
    433   %cmp2 = fcmp olt double %x3, %sel1
    434   %sel2 = select i1 %cmp2, double %x3, double %sel1
    435   ret double %sel2
    436 }
    437 
    438 ; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
    439 
    440 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
    441 ; SSE-LABEL: reassociate_maxs_double:
    442 ; SSE:       # %bb.0:
    443 ; SSE-NEXT:    divsd %xmm1, %xmm0
    444 ; SSE-NEXT:    maxsd %xmm3, %xmm2
    445 ; SSE-NEXT:    maxsd %xmm2, %xmm0
    446 ; SSE-NEXT:    retq
    447 ;
    448 ; AVX-LABEL: reassociate_maxs_double:
    449 ; AVX:       # %bb.0:
    450 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    451 ; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
    452 ; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
    453 ; AVX-NEXT:    retq
    454   %t0 = fdiv double %x0, %x1
    455   %cmp1 = fcmp ogt double %x2, %t0
    456   %sel1 = select i1 %cmp1, double %x2, double %t0
    457   %cmp2 = fcmp ogt double %x3, %sel1
    458   %sel2 = select i1 %cmp2, double %x3, double %sel1
    459   ret double %sel2
    460 }
    461 
    462 ; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
    463 
    464 define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    465 ; SSE-LABEL: reassociate_mins_v4f32:
    466 ; SSE:       # %bb.0:
    467 ; SSE-NEXT:    addps %xmm1, %xmm0
    468 ; SSE-NEXT:    minps %xmm3, %xmm2
    469 ; SSE-NEXT:    minps %xmm2, %xmm0
    470 ; SSE-NEXT:    retq
    471 ;
    472 ; AVX-LABEL: reassociate_mins_v4f32:
    473 ; AVX:       # %bb.0:
    474 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    475 ; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
    476 ; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
    477 ; AVX-NEXT:    retq
    478   %t0 = fadd <4 x float> %x0, %x1
    479   %cmp1 = fcmp olt <4 x float> %x2, %t0
    480   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
    481   %cmp2 = fcmp olt <4 x float> %x3, %sel1
    482   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
    483   ret <4 x float> %sel2
    484 }
    485 
    486 ; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
    487 
    488 define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    489 ; SSE-LABEL: reassociate_maxs_v4f32:
    490 ; SSE:       # %bb.0:
    491 ; SSE-NEXT:    addps %xmm1, %xmm0
    492 ; SSE-NEXT:    maxps %xmm3, %xmm2
    493 ; SSE-NEXT:    maxps %xmm2, %xmm0
    494 ; SSE-NEXT:    retq
    495 ;
    496 ; AVX-LABEL: reassociate_maxs_v4f32:
    497 ; AVX:       # %bb.0:
    498 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    499 ; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
    500 ; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
    501 ; AVX-NEXT:    retq
    502   %t0 = fadd <4 x float> %x0, %x1
    503   %cmp1 = fcmp ogt <4 x float> %x2, %t0
    504   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
    505   %cmp2 = fcmp ogt <4 x float> %x3, %sel1
    506   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
    507   ret <4 x float> %sel2
    508 }
    509 
    510 ; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
    511 
    512 define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    513 ; SSE-LABEL: reassociate_mins_v2f64:
    514 ; SSE:       # %bb.0:
    515 ; SSE-NEXT:    addpd %xmm1, %xmm0
    516 ; SSE-NEXT:    minpd %xmm3, %xmm2
    517 ; SSE-NEXT:    minpd %xmm2, %xmm0
    518 ; SSE-NEXT:    retq
    519 ;
    520 ; AVX-LABEL: reassociate_mins_v2f64:
    521 ; AVX:       # %bb.0:
    522 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    523 ; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
    524 ; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
    525 ; AVX-NEXT:    retq
    526   %t0 = fadd <2 x double> %x0, %x1
    527   %cmp1 = fcmp olt <2 x double> %x2, %t0
    528   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
    529   %cmp2 = fcmp olt <2 x double> %x3, %sel1
    530   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
    531   ret <2 x double> %sel2
    532 }
    533 
    534 ; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
    535 
    536 define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    537 ; SSE-LABEL: reassociate_maxs_v2f64:
    538 ; SSE:       # %bb.0:
    539 ; SSE-NEXT:    addpd %xmm1, %xmm0
    540 ; SSE-NEXT:    maxpd %xmm3, %xmm2
    541 ; SSE-NEXT:    maxpd %xmm2, %xmm0
    542 ; SSE-NEXT:    retq
    543 ;
    544 ; AVX-LABEL: reassociate_maxs_v2f64:
    545 ; AVX:       # %bb.0:
    546 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    547 ; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
    548 ; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
    549 ; AVX-NEXT:    retq
    550   %t0 = fadd <2 x double> %x0, %x1
    551   %cmp1 = fcmp ogt <2 x double> %x2, %t0
    552   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
    553   %cmp2 = fcmp ogt <2 x double> %x3, %sel1
    554   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
    555   ret <2 x double> %sel2
    556 }
    557 
    558 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
    559 
    560 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    561 ; AVX-LABEL: reassociate_mins_v8f32:
    562 ; AVX:       # %bb.0:
    563 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    564 ; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
    565 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    566 ; AVX-NEXT:    retq
    567   %t0 = fadd <8 x float> %x0, %x1
    568   %cmp1 = fcmp olt <8 x float> %x2, %t0
    569   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
    570   %cmp2 = fcmp olt <8 x float> %x3, %sel1
    571   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
    572   ret <8 x float> %sel2
    573 }
    574 
    575 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
    576 
    577 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    578 ; AVX-LABEL: reassociate_maxs_v8f32:
    579 ; AVX:       # %bb.0:
    580 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    581 ; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
    582 ; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
    583 ; AVX-NEXT:    retq
    584   %t0 = fadd <8 x float> %x0, %x1
    585   %cmp1 = fcmp ogt <8 x float> %x2, %t0
    586   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
    587   %cmp2 = fcmp ogt <8 x float> %x3, %sel1
    588   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
    589   ret <8 x float> %sel2
    590 }
    591 
    592 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
    593 
    594 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    595 ; AVX-LABEL: reassociate_mins_v4f64:
    596 ; AVX:       # %bb.0:
    597 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    598 ; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
    599 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    600 ; AVX-NEXT:    retq
    601   %t0 = fadd <4 x double> %x0, %x1
    602   %cmp1 = fcmp olt <4 x double> %x2, %t0
    603   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
    604   %cmp2 = fcmp olt <4 x double> %x3, %sel1
    605   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
    606   ret <4 x double> %sel2
    607 }
    608 
    609 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
    610 
    611 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    612 ; AVX-LABEL: reassociate_maxs_v4f64:
    613 ; AVX:       # %bb.0:
    614 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    615 ; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
    616 ; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
    617 ; AVX-NEXT:    retq
    618   %t0 = fadd <4 x double> %x0, %x1
    619   %cmp1 = fcmp ogt <4 x double> %x2, %t0
    620   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
    621   %cmp2 = fcmp ogt <4 x double> %x3, %sel1
    622   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
    623   ret <4 x double> %sel2
    624 }
    625 
    626 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
    627 ; Verify that reassociation is not happening needlessly or wrongly.
    628 
    629 declare double @bar()
    630 
    631 define double @reassociate_adds_from_calls() {
    632 ; AVX-LABEL: reassociate_adds_from_calls:
    633 ; AVX:       callq   bar
    634 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
    635 ; AVX-NEXT:  callq   bar
    636 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
    637 ; AVX-NEXT:  callq   bar
    638 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
    639 ; AVX-NEXT:  callq   bar
    640 ; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
    641 ; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
    642 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
    643 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
    644 
    645   %x0 = call double @bar()
    646   %x1 = call double @bar()
    647   %x2 = call double @bar()
    648   %x3 = call double @bar()
    649   %t0 = fadd double %x0, %x1
    650   %t1 = fadd double %t0, %x2
    651   %t2 = fadd double %t1, %x3
    652   ret double %t2
    653 }
    654 
    655 define double @already_reassociated() {
    656 ; AVX-LABEL: already_reassociated:
    657 ; AVX:       callq   bar
    658 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
    659 ; AVX-NEXT:  callq   bar
    660 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
    661 ; AVX-NEXT:  callq   bar
    662 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
    663 ; AVX-NEXT:  callq   bar
    664 ; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
    665 ; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
    666 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
    667 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
    668 
    669   %x0 = call double @bar()
    670   %x1 = call double @bar()
    671   %x2 = call double @bar()
    672   %x3 = call double @bar()
    673   %t0 = fadd double %x0, %x1
    674   %t1 = fadd double %x2, %x3
    675   %t2 = fadd double %t0, %t1
    676   ret double %t2
    677 }
    678 
    679